From ecac37fbd64c83bfc8d045ae3204ab86dc7bc29d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 2 May 2023 10:52:13 -0400 Subject: [PATCH 001/414] initial Globus Store class with some quick test code --- pom.xml | 7 +- .../dataaccess/GlobusOverlayAccessIO.java | 655 ++++++++++++++++++ .../dataaccess/RemoteOverlayAccessIO.java | 34 +- .../iq/dataverse/settings/JvmSettings.java | 2 + 4 files changed, 680 insertions(+), 18 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java diff --git a/pom.xml b/pom.xml index 5f514819947..e5b191f0ed7 100644 --- a/pom.xml +++ b/pom.xml @@ -167,8 +167,13 @@ org.eclipse.microprofile.config microprofile-config-api - provided + + + org.apache.geronimo.config + geronimo-config-impl + 1.0 + jakarta.platform jakarta.jakartaee-api diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java new file mode 100644 index 00000000000..fe62e25ad6f --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -0,0 +1,655 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.channels.Channel; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.WritableByteChannel; +import java.nio.file.Path; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.List; +import java.util.function.Predicate; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.http.client.config.CookieSpecs; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpDelete; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.ssl.SSLContextBuilder; +import org.apache.http.util.EntityUtils; + +import javax.net.ssl.SSLContext; + +/** + * @author qqmyers + * @param what it stores + */ +/* + * Globus Overlay Driver + * + * StorageIdentifier format: + * :///// + */ +public class GlobusOverlayAccessIO extends StorageIO { + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); + + private StorageIO baseStore = null; + private String path = null; + private String endpointWithBasePath = null; + + private static HttpClientContext localContext = HttpClientContext.create(); + private PoolingHttpClientConnectionManager cm = null; + CloseableHttpClient httpclient = null; + private int timeout = 1200; + private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); + private static boolean trustCerts = false; + private int httpConcurrency = 4; + + public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { + super(dvObject, req, driverId); + this.setIsLocalFile(false); + configureStores(req, driverId, null); + logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); + path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(path); + + logger.fine("Relative path: " + path); + } + + public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { + super(null, null, driverId); + this.setIsLocalFile(false); + configureStores(null, driverId, storageLocation); + + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Relative path: " + path); + } + + private void validatePath(String relPath) throws IOException { + try { + URI absoluteURI = new URI(endpointWithBasePath + "/" + relPath); + if(!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) { + throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s endpoint/basePath"); + } + } catch(URISyntaxException use) { + throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); + } + } + + + @Override + public void open(DataAccessOption... options) throws IOException { + + baseStore.open(options); + + DataAccessRequest req = this.getRequest(); + + if (isWriteAccessRequested(options)) { + isWriteAccess = true; + isReadAccess = false; + } else { + isWriteAccess = false; + isReadAccess = true; + } + + if (dvObject instanceof DataFile) { + String storageIdentifier = dvObject.getStorageIdentifier(); + + DataFile dataFile = this.getDataFile(); + + if (req != null && req.getParameter("noVarHeader") != null) { + baseStore.setNoVarHeader(true); + } + + if (storageIdentifier == null || "".equals(storageIdentifier)) { + throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); + } + + // Fix new DataFiles: DataFiles that have not yet been saved may use this method + // when they don't have their storageidentifier in the final form + // So we fix it up here. ToDo: refactor so that storageidentifier is generated + // by the appropriate StorageIO class and is final from the start. + logger.fine("StorageIdentifier is: " + storageIdentifier); + + if (isReadAccess) { + if (dataFile.getFilesize() >= 0) { + this.setSize(dataFile.getFilesize()); + } else { + logger.fine("Setting size"); + this.setSize(getSizeFromGlobus()); + } + if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + + List datavariables = dataFile.getDataTable().getDataVariables(); + String varHeaderLine = generateVariableHeader(datavariables); + this.setVarHeader(varHeaderLine); + } + + } + + this.setMimeType(dataFile.getContentType()); + + try { + this.setFileName(dataFile.getFileMetadata().getLabel()); + } catch (Exception ex) { + this.setFileName("unknown"); + } + } else if (dvObject instanceof Dataset) { + throw new IOException( + "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); + } else if (dvObject instanceof Dataverse) { + throw new IOException( + "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); + } else { + this.setSize(getSizeFromGlobus()); + } + } + + private long getSizeFromGlobus() { + throw new NotImplementedException(); + /* + long size = -1; + HttpHead head = new HttpHead(endpointWithBasePath + "/" + path); + try { + CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); + + try { + int code = response.getStatusLine().getStatusCode(); + logger.fine("Response for HEAD: " + code); + switch (code) { + case 200: + Header[] headers = response.getHeaders(HTTP.CONTENT_LEN); + logger.fine("Num headers: " + headers.length); + String sizeString = response.getHeaders(HTTP.CONTENT_LEN)[0].getValue(); + logger.fine("Content-Length: " + sizeString); + size = Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue()); + logger.fine("Found file size: " + size); + break; + default: + logger.warning("Response from " + head.getURI().toString() + " was " + code); + } + } finally { + EntityUtils.consume(response.getEntity()); + } + } catch (IOException e) { + logger.warning(e.getMessage()); + } + return size; + */ + } + + @Override + public InputStream getInputStream() throws IOException { + if (super.getInputStream() == null) { + try { + HttpGet get = new HttpGet(generateTemporaryDownloadUrl(null, null, null)); + CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); + + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + setInputStream(response.getEntity().getContent()); + break; + default: + logger.warning("Response from " + get.getURI().toString() + " was " + code); + throw new IOException("Cannot retrieve: " + endpointWithBasePath + "/" + path + " code: " + code); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + e.printStackTrace(); + throw new IOException("Error retrieving: " + endpointWithBasePath + "/" + path + " " + e.getMessage()); + + } + setChannel(Channels.newChannel(super.getInputStream())); + } + return super.getInputStream(); + } + + @Override + public Channel getChannel() throws IOException { + if (super.getChannel() == null) { + getInputStream(); + } + return channel; + } + + @Override + public ReadableByteChannel getReadChannel() throws IOException { + // Make sure StorageIO.channel variable exists + getChannel(); + return super.getReadChannel(); + } + + @Override + public void delete() throws IOException { + // Delete is best-effort - we tell the remote server and it may or may not + // implement this call + if (!isDirectAccess()) { + throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); + } + try { + HttpDelete del = new HttpDelete(endpointWithBasePath + "/" + path); + CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); + try { + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + logger.fine("Sent DELETE for " + endpointWithBasePath + "/" + path); + default: + logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); + } + } finally { + EntityUtils.consume(response.getEntity()); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + throw new IOException("Error deleting: " + endpointWithBasePath + "/" + path); + + } + + // Delete all the cached aux files as well: + deleteAllAuxObjects(); + + } + + @Override + public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { + return baseStore.openAuxChannel(auxItemTag, options); + } + + @Override + public boolean isAuxObjectCached(String auxItemTag) throws IOException { + return baseStore.isAuxObjectCached(auxItemTag); + } + + @Override + public long getAuxObjectSize(String auxItemTag) throws IOException { + return baseStore.getAuxObjectSize(auxItemTag); + } + + @Override + public Path getAuxObjectAsPath(String auxItemTag) throws IOException { + return baseStore.getAuxObjectAsPath(auxItemTag); + } + + @Override + public void backupAsAux(String auxItemTag) throws IOException { + baseStore.backupAsAux(auxItemTag); + } + + @Override + public void revertBackupAsAux(String auxItemTag) throws IOException { + baseStore.revertBackupAsAux(auxItemTag); + } + + @Override + // this method copies a local filesystem Path into this DataAccess Auxiliary + // location: + public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { + baseStore.savePathAsAux(fileSystemPath, auxItemTag); + } + + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); + } + + /** + * @param inputStream InputStream we want to save + * @param auxItemTag String representing this Auxiliary type ("extension") + * @throws IOException if anything goes wrong. + */ + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag); + } + + @Override + public List listAuxObjects() throws IOException { + return baseStore.listAuxObjects(); + } + + @Override + public void deleteAuxObject(String auxItemTag) throws IOException { + baseStore.deleteAuxObject(auxItemTag); + } + + @Override + public void deleteAllAuxObjects() throws IOException { + baseStore.deleteAllAuxObjects(); + } + + @Override + public String getStorageLocation() throws IOException { + String fullStorageLocation = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStorageLocation); + int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); + if(driverIndex >=0) { + fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + if (this.getDvObject() instanceof Dataset) { + throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); + } else if (this.getDvObject() instanceof DataFile) { + fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; + } else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStorageLocation: " + fullStorageLocation); + return fullStorageLocation; + } + + @Override + public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); + } + + @Override + public boolean exists() { + logger.fine("Exists called"); + return (getSizeFromGlobus() != -1); + } + + @Override + public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: there are no write Channels associated with S3 objects."); + } + + @Override + public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects."); + } + + @Override + public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { + return baseStore.getAuxFileAsInputStream(auxItemTag); + } + + @Override + public boolean downloadRedirectEnabled() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + if ("true".equalsIgnoreCase(optionValue)) { + return true; + } + return false; + } + + public boolean downloadRedirectEnabled(String auxObjectTag) { + return baseStore.downloadRedirectEnabled(auxObjectTag); + } + + @Override + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) + throws IOException { + + // ToDo - support remote auxiliary Files + if (auxiliaryTag == null) { + String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); + if (secretKey == null) { + return endpointWithBasePath + "/" + path; + } else { + return UrlSignerUtil.signUrl(endpointWithBasePath + "/" + path, getUrlExpirationMinutes(), null, "GET", + secretKey); + } + } else { + return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); + } + } + + int getUrlExpirationMinutes() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); + if (optionValue != null) { + Integer num; + try { + num = Integer.parseInt(optionValue); + } catch (NumberFormatException ex) { + num = null; + } + if (num != null) { + return num; + } + } + return 60; + } + + private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); + logger.info("base-uri is " + endpointWithBasePath); + if (endpointWithBasePath == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-uri is required"); + } else { + try { + new URI(endpointWithBasePath); + } catch (Exception e) { + logger.warning( + "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); + throw new IOException("Can't interpret base-url as a URI"); + } + + } + + if (baseStore == null) { + String baseDriverId = getBaseStoreIdFor(driverId); + String fullStorageLocation = null; + String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if(dvObject instanceof Dataset) { + baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); + } else { + if (this.getDvObject() != null) { + fullStorageLocation = getStoragePath(); + + // S3 expects :/// + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: RemoteOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + + } else if (storageLocation != null) { + // ://// + //remoteDriverId:// is removed if coming through directStorageIO + int index = storageLocation.indexOf(DataAccess.SEPARATOR); + if(index > 0) { + storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); + } + //THe base store needs the baseStoreIdentifier and not the relative URL + fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); + + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: RemoteOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + } + baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); + } + if (baseDriverType.contentEquals(DataAccess.S3)) { + ((S3AccessIO) baseStore).setMainDriver(false); + } + } + remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); + try { + remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); + } catch(MalformedURLException mfue) { + logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); + } + } + + //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required. + private String getStoragePath() throws IOException { + String fullStoragePath = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStoragePath); + int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); + if(driverIndex >=0) { + fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + int suffixIndex = fullStoragePath.indexOf("//"); + if(suffixIndex >=0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); + } + if (this.getDvObject() instanceof Dataset) { + fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" + + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (this.getDvObject() instanceof DataFile) { + fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" + + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + }else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStoragePath: " + fullStoragePath); + return fullStoragePath; + } + + public CloseableHttpClient getSharedHttpClient() { + if (httpclient == null) { + try { + initHttpPool(); + httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); + + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { + logger.warning(ex.getMessage()); + } + } + return httpclient; + } + + private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { + if (trustCerts) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + SSLContext sslContext; + SSLConnectionSocketFactory connectionFactory; + + sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); + // create an SSL Socket Factory to use the SSLContext with the trust self signed + // certificate strategy + // and allow all hosts verifier. + connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); + + Registry registry = RegistryBuilder.create() + .register("https", connectionFactory).build(); + cm = new PoolingHttpClientConnectionManager(registry); + } else { + cm = new PoolingHttpClientConnectionManager(); + } + cm.setDefaultMaxPerRoute(httpConcurrency); + cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); + } + + @Override + public void savePath(Path fileSystemPath) throws IOException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: savePath() not implemented in this storage driver."); + + } + + @Override + public void saveInputStream(InputStream inputStream) throws IOException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + + } + + @Override + public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + + } + + protected static boolean isValidIdentifier(String driverId, String storageId) { + String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); + String baseUri = System.getProperty("dataverse.files." + driverId + ".base-uri"); + try { + URI absoluteURI = new URI(baseUri + "/" + urlPath); + if(!absoluteURI.normalize().toString().startsWith(baseUri)) { + logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); + return false; + } + } catch(URISyntaxException use) { + logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); + logger.warning(use.getLocalizedMessage()); + return false; + } + return true; + } + + public static String getBaseStoreIdFor(String driverId) { + return System.getProperty("dataverse.files." + driverId + ".base-store"); + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + return baseStore.cleanUp(filter, dryRun); + } + + public static void main(String[] args) { + System.out.println("Running the main method"); + if (args.length > 0) { + System.out.printf("List of arguments: {}", Arrays.toString(args)); + } + System.setProperty("dataverse.files.globus.base-uri", "12345/top"); + System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); + logger.info(JvmSettings.BASE_URI.lookup("globus")); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 66c6a4cc2ee..ee2b6779cba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -65,7 +65,7 @@ public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); private StorageIO baseStore = null; - private String urlPath = null; + private String path = null; private String baseUrl = null; private static HttpClientContext localContext = HttpClientContext.create(); @@ -83,10 +83,10 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) this.setIsLocalFile(false); configureStores(req, driverId, null); logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); - urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); - validatePath(urlPath); + path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(path); - logger.fine("Base URL: " + urlPath); + logger.fine("Base URL: " + path); } public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException { @@ -94,14 +94,14 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE this.setIsLocalFile(false); configureStores(null, driverId, storageLocation); - urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); - validatePath(urlPath); - logger.fine("Base URL: " + urlPath); + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Base URL: " + path); } - private void validatePath(String path) throws IOException { + private void validatePath(String relPath) throws IOException { try { - URI absoluteURI = new URI(baseUrl + "/" + urlPath); + URI absoluteURI = new URI(baseUrl + "/" + relPath); if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); } @@ -182,7 +182,7 @@ public void open(DataAccessOption... options) throws IOException { private long getSizeFromHttpHeader() { long size = -1; - HttpHead head = new HttpHead(baseUrl + "/" + urlPath); + HttpHead head = new HttpHead(baseUrl + "/" + path); try { CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); @@ -224,12 +224,12 @@ public InputStream getInputStream() throws IOException { break; default: logger.warning("Response from " + get.getURI().toString() + " was " + code); - throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath + " code: " + code); + throw new IOException("Cannot retrieve: " + baseUrl + "/" + path + " code: " + code); } } catch (Exception e) { logger.warning(e.getMessage()); e.printStackTrace(); - throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath + " " + e.getMessage()); + throw new IOException("Error retrieving: " + baseUrl + "/" + path + " " + e.getMessage()); } setChannel(Channels.newChannel(super.getInputStream())); @@ -260,13 +260,13 @@ public void delete() throws IOException { throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); } try { - HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath); + HttpDelete del = new HttpDelete(baseUrl + "/" + path); CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); try { int code = response.getStatusLine().getStatusCode(); switch (code) { case 200: - logger.fine("Sent DELETE for " + baseUrl + "/" + urlPath); + logger.fine("Sent DELETE for " + baseUrl + "/" + path); default: logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); } @@ -275,7 +275,7 @@ public void delete() throws IOException { } } catch (Exception e) { logger.warning(e.getMessage()); - throw new IOException("Error deleting: " + baseUrl + "/" + urlPath); + throw new IOException("Error deleting: " + baseUrl + "/" + path); } @@ -420,9 +420,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary if (auxiliaryTag == null) { String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); if (secretKey == null) { - return baseUrl + "/" + urlPath; + return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); } } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 86130f5146e..4fb895f5adc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -47,6 +47,8 @@ public enum JvmSettings { // FILES SETTINGS SCOPE_FILES(PREFIX, "files"), FILES_DIRECTORY(SCOPE_FILES, "directory"), + FILES(SCOPE_FILES), + BASE_URI(FILES, "base-uri"), // SOLR INDEX SETTINGS SCOPE_SOLR(PREFIX, "solr"), From 2c4c927cc8f20d53ee1aaaf1979b793ee53f9b3f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 May 2023 14:13:02 -0400 Subject: [PATCH 002/414] add token --- .../dataaccess/GlobusOverlayAccessIO.java | 171 +++++++++++------- .../iq/dataverse/settings/JvmSettings.java | 1 + 2 files changed, 111 insertions(+), 61 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index fe62e25ad6f..050b9ddc176 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.UrlSignerUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.FileNotFoundException; import java.io.IOException; @@ -31,6 +32,7 @@ import java.util.logging.Logger; import org.apache.commons.lang3.NotImplementedException; +import org.apache.http.client.ClientProtocolException; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; @@ -49,6 +51,7 @@ import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.util.EntityUtils; +import javax.json.JsonObject; import javax.net.ssl.SSLContext; /** @@ -58,8 +61,8 @@ /* * Globus Overlay Driver * - * StorageIdentifier format: - * :///// + * StorageIdentifier format: :///// */ public class GlobusOverlayAccessIO extends StorageIO { @@ -68,6 +71,7 @@ public class GlobusOverlayAccessIO extends StorageIO { private StorageIO baseStore = null; private String path = null; private String endpointWithBasePath = null; + private String globusToken = null; private static HttpClientContext localContext = HttpClientContext.create(); private PoolingHttpClientConnectionManager cm = null; @@ -86,7 +90,7 @@ public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); validatePath(path); - + logger.fine("Relative path: " + path); } @@ -99,18 +103,17 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE validatePath(path); logger.fine("Relative path: " + path); } - + private void validatePath(String relPath) throws IOException { try { URI absoluteURI = new URI(endpointWithBasePath + "/" + relPath); - if(!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) { + if (!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) { throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s endpoint/basePath"); } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); } - } - + } @Override public void open(DataAccessOption... options) throws IOException { @@ -181,37 +184,64 @@ public void open(DataAccessOption... options) throws IOException { } } + // Call the Globus API to get the file size private long getSizeFromGlobus() { - throw new NotImplementedException(); - /* - long size = -1; - HttpHead head = new HttpHead(endpointWithBasePath + "/" + path); + // Construct Globus URL + URI absoluteURI = null; try { - CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); - - try { - int code = response.getStatusLine().getStatusCode(); - logger.fine("Response for HEAD: " + code); - switch (code) { - case 200: - Header[] headers = response.getHeaders(HTTP.CONTENT_LEN); - logger.fine("Num headers: " + headers.length); - String sizeString = response.getHeaders(HTTP.CONTENT_LEN)[0].getValue(); - logger.fine("Content-Length: " + sizeString); - size = Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue()); - logger.fine("Found file size: " + size); - break; - default: - logger.warning("Response from " + head.getURI().toString() + " was " + code); - } - } finally { - EntityUtils.consume(response.getEntity()); + int filenameStart = path.lastIndexOf("/") + 1; + int pathStart = endpointWithBasePath.indexOf("/") + 1; + + String directoryPath = (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "") + + path.substring(0, filenameStart); + String filename = path.substring(filenameStart); + String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath; + + absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + path + "&filter=name:" + filename); + HttpGet get = new HttpGet(absoluteURI); + String token = JvmSettings.GLOBUS_TOKEN.lookup(driverId); + logger.info("Token is " + token); + get.addHeader("Authorization", "Bearer " + token); + CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); + if (response.getStatusLine().getStatusCode() == 200) { + //Get reponse as string + String responseString = EntityUtils.toString(response.getEntity()); + logger.fine("Response from " + get.getURI().toString() + " is: " + responseString); + JsonObject responseJson = JsonUtil.getJsonObject(responseString); + return (long) responseJson.getInt("size"); + } else { + logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode()); + logger.info(EntityUtils.toString(response.getEntity())); } + } catch (URISyntaxException e) { + // Should have been caught in validatePath + e.printStackTrace(); + } catch (ClientProtocolException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } catch (IOException e) { - logger.warning(e.getMessage()); + // TODO Auto-generated catch block + e.printStackTrace(); } - return size; - */ + return -1; + + /* + * long size = -1; HttpHead head = new HttpHead(endpointWithBasePath + "/" + + * path); try { CloseableHttpResponse response = + * getSharedHttpClient().execute(head, localContext); + * + * try { int code = response.getStatusLine().getStatusCode(); + * logger.fine("Response for HEAD: " + code); switch (code) { case 200: Header[] + * headers = response.getHeaders(HTTP.CONTENT_LEN); logger.fine("Num headers: " + * + headers.length); String sizeString = + * response.getHeaders(HTTP.CONTENT_LEN)[0].getValue(); + * logger.fine("Content-Length: " + sizeString); size = + * Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue()); + * logger.fine("Found file size: " + size); break; default: + * logger.warning("Response from " + head.getURI().toString() + " was " + code); + * } } finally { EntityUtils.consume(response.getEntity()); } } catch + * (IOException e) { logger.warning(e.getMessage()); } return size; + */ } @Override @@ -360,8 +390,9 @@ public String getStorageLocation() throws IOException { String fullStorageLocation = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStorageLocation); int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + if (driverIndex >= 0) { + fullStorageLocation = fullStorageLocation + .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); } if (this.getDvObject() instanceof Dataset) { throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); @@ -411,7 +442,7 @@ public boolean downloadRedirectEnabled() { } return false; } - + public boolean downloadRedirectEnabled(String auxObjectTag) { return baseStore.downloadRedirectEnabled(auxObjectTag); } @@ -469,9 +500,10 @@ private void configureStores(DataAccessRequest req, String driverId, String stor if (baseStore == null) { String baseDriverId = getBaseStoreIdFor(driverId); String fullStorageLocation = null; - String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - - if(dvObject instanceof Dataset) { + String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if (dvObject instanceof Dataset) { baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); } else { if (this.getDvObject() != null) { @@ -486,8 +518,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + + "/" + fullStorageLocation; break; default: logger.warning("Not Implemented: RemoteOverlay store with base store type: " @@ -497,12 +529,12 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } else if (storageLocation != null) { // ://// - //remoteDriverId:// is removed if coming through directStorageIO + // remoteDriverId:// is removed if coming through directStorageIO int index = storageLocation.indexOf(DataAccess.SEPARATOR); - if(index > 0) { + if (index > 0) { storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); } - //THe base store needs the baseStoreIdentifier and not the relative URL + // THe base store needs the baseStoreIdentifier and not the relative URL fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); switch (baseDriverType) { @@ -513,8 +545,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + + "/" + fullStorageLocation; break; default: logger.warning("Not Implemented: RemoteOverlay store with base store type: " @@ -530,37 +562,41 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); try { - remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); - } catch(MalformedURLException mfue) { + remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); + } catch (MalformedURLException mfue) { logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); } } - //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required. + // Convenience method to assemble the path, starting with the DOI + // authority/identifier/, that is needed to create a base store via + // DataAccess.getDirectStorageIO - the caller has to add the store type specific + // prefix required. private String getStoragePath() throws IOException { String fullStoragePath = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStoragePath); int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + if (driverIndex >= 0) { + fullStoragePath = fullStoragePath + .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); } int suffixIndex = fullStoragePath.indexOf("//"); - if(suffixIndex >=0) { - fullStoragePath = fullStoragePath.substring(0, suffixIndex); + if (suffixIndex >= 0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); } if (this.getDvObject() instanceof Dataset) { fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; } else if (this.getDvObject() instanceof DataFile) { fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; - }else if (dvObject instanceof Dataverse) { + + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (dvObject instanceof Dataverse) { throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); } logger.fine("fullStoragePath: " + fullStoragePath); return fullStoragePath; } - + public CloseableHttpClient getSharedHttpClient() { if (httpclient == null) { try { @@ -622,11 +658,11 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { String baseUri = System.getProperty("dataverse.files." + driverId + ".base-uri"); try { URI absoluteURI = new URI(baseUri + "/" + urlPath); - if(!absoluteURI.normalize().toString().startsWith(baseUri)) { + if (!absoluteURI.normalize().toString().startsWith(baseUri)) { logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); return false; } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); logger.warning(use.getLocalizedMessage()); return false; @@ -642,14 +678,27 @@ public static String getBaseStoreIdFor(String driverId) { public List cleanUp(Predicate filter, boolean dryRun) throws IOException { return baseStore.cleanUp(filter, dryRun); } - + public static void main(String[] args) { System.out.println("Running the main method"); if (args.length > 0) { System.out.printf("List of arguments: {}", Arrays.toString(args)); } - System.setProperty("dataverse.files.globus.base-uri", "12345/top"); + System.setProperty("dataverse.files.globus.base-uri", "2791b83e-b989-47c5-a7fa-ce65fd949522"); System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); + System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOlprRmxGejNTWDlkTVpUNk92ZmVJaFQyTWY0SDd4cXBoTDNSS29vUmRGVlE9"); + System.setProperty("dataverse.files.globus.base-store","file"); + System.setProperty("dataverse.files.file.type", + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + System.setProperty("dataverse.files.file.directory", "/tmp/files"); logger.info(JvmSettings.BASE_URI.lookup("globus")); + try { + GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO("globus://1234//2791b83e-b989-47c5-a7fa-ce65fd949522/hdc1/image001.mrc", "globus"); + logger.info("Size is " + gsio.getSizeFromGlobus()); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 4fb895f5adc..eac8411c939 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -49,6 +49,7 @@ public enum JvmSettings { FILES_DIRECTORY(SCOPE_FILES, "directory"), FILES(SCOPE_FILES), BASE_URI(FILES, "base-uri"), + GLOBUS_TOKEN(FILES, "globus-token"), // SOLR INDEX SETTINGS SCOPE_SOLR(PREFIX, "solr"), From 3c3378f5a3bf39eff13a582d0dc52a2a5549af8f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 May 2023 14:53:25 -0400 Subject: [PATCH 003/414] start refactoring Globus bean --- .../dataaccess/GlobusOverlayAccessIO.java | 28 +++++++++----- .../iq/dataverse/globus/AccessToken.java | 2 +- .../dataverse/globus/GlobusServiceBean.java | 37 +++++++++++-------- 3 files changed, 41 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 050b9ddc176..0d7c5458e14 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -5,6 +5,8 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.globus.AccessToken; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; @@ -28,10 +30,8 @@ import java.util.Arrays; import java.util.List; import java.util.function.Predicate; -import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.lang3.NotImplementedException; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; @@ -83,6 +83,8 @@ public class GlobusOverlayAccessIO extends StorageIO { private static boolean trustCerts = false; private int httpConcurrency = 4; + private String globusAccessToken = null; + public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); this.setIsLocalFile(false); @@ -190,18 +192,19 @@ private long getSizeFromGlobus() { URI absoluteURI = null; try { int filenameStart = path.lastIndexOf("/") + 1; - int pathStart = endpointWithBasePath.indexOf("/") + 1; - - String directoryPath = (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "") + int pathStart = endpointWithBasePath.indexOf("/"); +logger.info("endpointWithBasePath: " + endpointWithBasePath); + String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "") + path.substring(0, filenameStart); + logger.info("directoryPath: " + directoryPath); String filename = path.substring(filenameStart); String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath; - absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + path + "&filter=name:" + filename); + absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + directoryPath + "&filter=name:" + filename); HttpGet get = new HttpGet(absoluteURI); - String token = JvmSettings.GLOBUS_TOKEN.lookup(driverId); - logger.info("Token is " + token); - get.addHeader("Authorization", "Bearer " + token); + + logger.info("Token is " + globusAccessToken); + get.addHeader("Authorization", "Bearer " + globusAccessToken); CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); if (response.getStatusLine().getStatusCode() == 200) { //Get reponse as string @@ -482,6 +485,8 @@ int getUrlExpirationMinutes() { } private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + AccessToken accessToken = GlobusServiceBean.getClientToken(JvmSettings.GLOBUS_TOKEN.lookup(driverId)); + globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken(); endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); logger.info("base-uri is " + endpointWithBasePath); if (endpointWithBasePath == null) { @@ -692,8 +697,11 @@ public static void main(String[] args) { DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); System.setProperty("dataverse.files.file.directory", "/tmp/files"); logger.info(JvmSettings.BASE_URI.lookup("globus")); + + + try { - GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO("globus://1234//2791b83e-b989-47c5-a7fa-ce65fd949522/hdc1/image001.mrc", "globus"); + GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO("globus://1234///hdc1/image001.mrc", "globus"); logger.info("Size is " + gsio.getSizeFromGlobus()); } catch (IOException e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java index 877fc68e4a1..c93e2c6aa94 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java @@ -46,7 +46,7 @@ String getRefreshToken() { return refreshToken; } - ArrayList getOtherTokens() { + public ArrayList getOtherTokens() { return otherTokens; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 9d80c5cc280..c2137dd1f47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -167,7 +167,8 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException { if (ruleId.length() > 0) { - AccessToken clientTokenUser = getClientToken(); + AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); + globusLogger.info("Start deleting permissions."); String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); @@ -264,15 +265,21 @@ public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger glo return task; } - public AccessToken getClientToken() throws MalformedURLException { - String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - URL url = new URL( - "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); - - MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); + public static AccessToken getClientToken(String globusBasicToken) { + URL url; AccessToken clientTokenUser = null; - if (result.status == 200) { - clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + + try { + url = new URL( + "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); + + MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); + if (result.status == 200) { + clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + } + } catch (MalformedURLException e) { + // On a statically defined URL... + e.printStackTrace(); } return clientTokenUser; } @@ -306,7 +313,7 @@ public AccessToken getAccessToken(HttpServletRequest origRequest, String globusB } - public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, + public static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, String jsonString) { String str = null; HttpURLConnection connection = null; @@ -359,7 +366,7 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode } - private StringBuilder readResultJson(InputStream in) { + private static StringBuilder readResultJson(InputStream in) { StringBuilder sb = null; try { @@ -378,7 +385,7 @@ private StringBuilder readResultJson(InputStream in) { return sb; } - private T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { + private static T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { if (sb != null) { Gson gson = null; if (namingPolicy) { @@ -420,7 +427,7 @@ public String getDirectory(String datasetId) { } - class MakeRequestResponse { + static class MakeRequestResponse { public String jsonResponse; public int status; @@ -451,7 +458,7 @@ public boolean giveGlobusPublicPermissions(String datasetId) if (globusEndpoint.equals("") || globusBasicToken.equals("")) { return false; } - AccessToken clientTokenUser = getClientToken(); + AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); if (clientTokenUser == null) { logger.severe("Cannot get client token "); return false; @@ -908,7 +915,7 @@ private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws try { globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(pollingInterval * 1000); - AccessToken clientTokenUser = getClientToken(); + AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); // success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); task = getTask(clientTokenUser, taskId, globusLogger); if (task != null) { From f14b75454a524fd8816d6f5367b0e15fbd0ded92 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 May 2023 14:53:56 -0400 Subject: [PATCH 004/414] enable globus store main() to run - will revert --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e5b191f0ed7..4926f59f8a0 100644 --- a/pom.xml +++ b/pom.xml @@ -184,7 +184,7 @@ org.glassfish jakarta.json - provided + From 502e660fe342939a617edd6d17a425c83b5a269b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 12 May 2023 13:22:46 -0400 Subject: [PATCH 005/414] suppress thumb generation after a failure --- .../edu/harvard/iq/dataverse/DvObject.java | 14 +++++ .../dataaccess/ImageThumbConverter.java | 55 ++++++++++++------- .../dataverse/ingest/IngestServiceBean.java | 4 +- .../V5.13.0.1__9506-track-thumb-failures.sql | 1 + 4 files changed, 54 insertions(+), 20 deletions(-) create mode 100644 src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 854888737ee..6cb3816e3f1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -181,7 +181,20 @@ public boolean isPreviewImageAvailable() { public void setPreviewImageAvailable(boolean status) { this.previewImageAvailable = status; } + + /** Indicates whether a previous attempt to generate a preview image has failed, regardless of size. + * If so, we won't want to try again every time the preview/thumbnail is requested for a view. + */ + private boolean previewsHaveFailed; + + public boolean isPreviewsHaveFailed() { + return previewsHaveFailed; + } + public void setPreviewsHaveFailed(boolean previewsHaveFailed) { + this.previewsHaveFailed = previewsHaveFailed; + } + public Timestamp getModificationTime() { return modificationTime; } @@ -462,6 +475,7 @@ public void setStorageIdentifier(String storageIdentifier) { */ public abstract boolean isAncestorOf( DvObject other ); + @OneToMany(mappedBy = "definitionPoint",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true) List roleAssignments; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 2b4aed3a9a5..eb08646454d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -48,6 +48,7 @@ import java.nio.channels.WritableByteChannel; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.io.IOUtils; //import org.primefaces.util.Base64; @@ -110,15 +111,24 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } if (isThumbnailCached(storageIO, size)) { + logger.fine("Found cached thumbnail for " + file.getId()); return true; } - logger.fine("Checking for thumbnail, file type: " + file.getContentType()); - - if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { - return generateImageThumbnail(storageIO, size); - } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { - return generatePDFThumbnail(storageIO, size); + logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate thumbnail, file id: " + file.getId()); + // Don't try to generate if there have been failures: + if (!file.isPreviewsHaveFailed()) { + boolean thumbnailGenerated = false; + if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { + thumbnailGenerated = generateImageThumbnail(storageIO, size); + } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { + thumbnailGenerated = generatePDFThumbnail(storageIO, size); + } + if (!thumbnailGenerated) { + logger.fine("No thumbnail generated for " + file.getId()); + file.setPreviewGenerationHasPreviouslyFailed(true); + } + return thumbnailGenerated; } return false; @@ -436,20 +446,27 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { if (cachedThumbnailChannel == null) { logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size); - // try to generate, if not available: - boolean generated = false; - if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { - generated = generateImageThumbnail(storageIO, size); - } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { - generated = generatePDFThumbnail(storageIO, size); - } + // try to generate, if not available and hasn't failed before + logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate base64 thumbnail, file id: " + file.getId()); + if (!file.isPreviewsHaveFailed()) { + boolean generated = false; + if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { + generated = generateImageThumbnail(storageIO, size); + } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { + generated = generatePDFThumbnail(storageIO, size); + } - if (generated) { - // try to open again: - try { - cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); - } catch (Exception ioEx) { - cachedThumbnailChannel = null; + if (!generated) { + // Record failure + logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId()); + file.setPreviewGenerationHasPreviouslyFailed(true); + } else { + // Success - try to open again: + try { + cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); + } catch (Exception ioEx) { + cachedThumbnailChannel = null; + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 5a353453fe8..fbe2d7b38ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -292,7 +292,9 @@ public List saveAndAddFilesToDataset(DatasetVersion version, } catch (IOException ioex) { logger.warning("Failed to save generated file " + generated.toString()); - } + //Shouldn't mark this file as having a preview after this. + dataFile.setPreviewImageAvailable(false); + } } // ... but we definitely want to delete it: diff --git a/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql new file mode 100644 index 00000000000..9b12d27db91 --- /dev/null +++ b/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql @@ -0,0 +1 @@ +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewshavefailed BOOLEAN DEFAULT FALSE; \ No newline at end of file From 0fea5ccca11b2348429ddfee75e4bafc709c7473 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 12 May 2023 13:25:38 -0400 Subject: [PATCH 006/414] refactor error --- .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index eb08646454d..254c334d655 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -126,7 +126,7 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } if (!thumbnailGenerated) { logger.fine("No thumbnail generated for " + file.getId()); - file.setPreviewGenerationHasPreviouslyFailed(true); + file.setPreviewsHaveFailed(true); } return thumbnailGenerated; } @@ -459,7 +459,7 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { if (!generated) { // Record failure logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId()); - file.setPreviewGenerationHasPreviouslyFailed(true); + file.setPreviewsHaveFailed(true); } else { // Success - try to open again: try { From 8f5350ae0df4df60c55ff770259531935cb6ac9b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 15 May 2023 10:32:21 -0400 Subject: [PATCH 007/414] cache isThumb available --- .../iq/dataverse/ThumbnailServiceWrapper.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 6c8db8c124b..e2bb21c8a4c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -49,6 +49,7 @@ public class ThumbnailServiceWrapper implements java.io.Serializable { private Map dvobjectThumbnailsMap = new HashMap<>(); private Map dvobjectViewMap = new HashMap<>(); + private Map hasThumbMap = new HashMap<>(); private String getAssignedDatasetImage(Dataset dataset, int size) { if (dataset == null) { @@ -133,7 +134,7 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) { if ((!((DataFile)result.getEntity()).isRestricted() || permissionsWrapper.hasDownloadFilePermission(result.getEntity())) - && dataFileService.isThumbnailAvailable((DataFile) result.getEntity())) { + && isThumbnailAvailable((DataFile) result.getEntity())) { cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( (DataFile) result.getEntity(), @@ -159,6 +160,13 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) { return null; } + public boolean isThumbnailAvailable(DataFile entity) { + if(!hasThumbMap.containsKey(entity.getId())) { + hasThumbMap.put(entity.getId(), dataFileService.isThumbnailAvailable(entity)); + } + return hasThumbMap.get(entity.getId()); + } + // it's the responsibility of the user - to make sure the search result // passed to this method is of the Dataset type! public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { @@ -295,7 +303,7 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo } } - if (dataFileService.isThumbnailAvailable(thumbnailImageFile)) { + if (isThumbnailAvailable(thumbnailImageFile)) { cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( thumbnailImageFile, size); @@ -323,6 +331,7 @@ public String getDataverseCardImageAsBase64Url(SolrSearchResult result) { public void resetObjectMaps() { dvobjectThumbnailsMap = new HashMap<>(); dvobjectViewMap = new HashMap<>(); + hasThumbMap = new HashMap<>(); } From 8604eef7f470eade8dbf885ed42bc47407db74ff Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 15 May 2023 13:22:18 -0400 Subject: [PATCH 008/414] set thumb fail column --- .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java | 5 ++++- .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 196f84b6877..a5822828682 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1127,7 +1127,7 @@ public boolean isThumbnailAvailable (DataFile file) { } // If thumbnails are not even supported for this class of files, - // there's notthing to talk about: + // there's nothing to talk about: if (!FileUtil.isThumbnailSupported(file)) { return false; } @@ -1149,6 +1149,9 @@ public boolean isThumbnailAvailable (DataFile file) { file.setPreviewImageAvailable(true); this.save(file); return true; + } else { + file.setPreviewsHaveFailed(true); + this.save(file); } return false; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 254c334d655..ab9294eea72 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -115,7 +115,7 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s return true; } - logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate thumbnail, file id: " + file.getId()); + logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); // Don't try to generate if there have been failures: if (!file.isPreviewsHaveFailed()) { boolean thumbnailGenerated = false; From aeae8f4ddbb05794c177e9b1d33725e1ed7d7e2f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 15 May 2023 13:50:49 -0400 Subject: [PATCH 009/414] use thumb wrapper in edit and view files --- src/main/webapp/editFilesFragment.xhtml | 4 ++-- src/main/webapp/file-info-fragment.xhtml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index a4e635b8c14..af06b44e3bc 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -360,13 +360,13 @@
- - + #{fileMetadata.label} diff --git a/src/main/webapp/file-info-fragment.xhtml b/src/main/webapp/file-info-fragment.xhtml index 33a8d2c3ca5..3e8e80d51e7 100644 --- a/src/main/webapp/file-info-fragment.xhtml +++ b/src/main/webapp/file-info-fragment.xhtml @@ -28,8 +28,8 @@
- - + From c4ad20bc4b67b93908e60b76a251240f4a6e2540 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 17 May 2023 13:49:35 -0400 Subject: [PATCH 010/414] add api --- .../edu/harvard/iq/dataverse/api/Admin.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index d219339add9..14c556e9caa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2321,4 +2321,26 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl)); } + @DELETE + @Path("/clearThumbnailFailureFlag") + public Response clearThumbnailFailureFlag() { + em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE").executeUpdate(); + return ok("Thumnail Failure Flags cleared."); + } + + @DELETE + @Path("/clearThumbnailFailureFlag/{id}") + public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String fileId) { + try { + DataFile df = findDataFileOrDie(fileId); + Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE where id = ?"); + deleteQuery.setParameter(1, df.getId()); + deleteQuery.executeUpdate(); + return ok("Thumnail Failure Flag cleared for file id=: " + df.getId() + "."); + } catch (WrappedResponse r) { + logger.info("Could not find file with the id: " + fileId); + return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId); + } + } + } From 63e98b3b60a4baae98f1f88a282b97694929c443 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 17 May 2023 14:16:47 -0400 Subject: [PATCH 011/414] make clearer --- .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index a5822828682..f41565c9449 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1149,11 +1149,9 @@ public boolean isThumbnailAvailable (DataFile file) { file.setPreviewImageAvailable(true); this.save(file); return true; - } else { - file.setPreviewsHaveFailed(true); - this.save(file); } - + file.setPreviewsHaveFailed(true); + this.save(file); return false; } From 2671cb75effb5425d02b3e874c7525b7833dc533 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 17 May 2023 14:25:58 -0400 Subject: [PATCH 012/414] update comment --- src/main/java/edu/harvard/iq/dataverse/DvObject.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 6cb3816e3f1..87619450133 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -182,8 +182,11 @@ public void setPreviewImageAvailable(boolean status) { this.previewImageAvailable = status; } - /** Indicates whether a previous attempt to generate a preview image has failed, regardless of size. - * If so, we won't want to try again every time the preview/thumbnail is requested for a view. + /** + * Indicates whether a previous attempt to generate a preview image has failed, + * regardless of size. This could be due to the file not being accessible, or a + * real failure in generating the thumbnail. In both cases, we won't want to try + * again every time the preview/thumbnail is requested for a view. */ private boolean previewsHaveFailed; From 19db99b1427700c9cc4ad462c0edd017e6dd5799 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 17 May 2023 14:26:28 -0400 Subject: [PATCH 013/414] remove setting flag where datafile is not clearly being saved to db --- .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index ab9294eea72..921faba7989 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -126,7 +126,6 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } if (!thumbnailGenerated) { logger.fine("No thumbnail generated for " + file.getId()); - file.setPreviewsHaveFailed(true); } return thumbnailGenerated; } @@ -459,7 +458,6 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { if (!generated) { // Record failure logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId()); - file.setPreviewsHaveFailed(true); } else { // Success - try to open again: try { From 156d025970eeb5223b6fd8343db09cafee057fed Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 1 Jun 2023 15:09:25 -0400 Subject: [PATCH 014/414] fix non-merge-able error when recording thumb fail --- .../iq/dataverse/DataFileServiceBean.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index f41565c9449..880b2ea7dc4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1142,17 +1142,17 @@ public boolean isThumbnailAvailable (DataFile file) { is more important... */ - - if (ImageThumbConverter.isThumbnailAvailable(file)) { - file = this.find(file.getId()); - file.setPreviewImageAvailable(true); - this.save(file); - return true; - } - file.setPreviewsHaveFailed(true); - this.save(file); - return false; + file = this.find(file.getId()); + if (ImageThumbConverter.isThumbnailAvailable(file)) { + file.setPreviewImageAvailable(true); + this.save(file); + return true; + } else { + file.setPreviewsHaveFailed(true); + this.save(file); + return false; + } } From 97aa46cb3e9bd2d424961e68e9d024216740c57f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 13 Jun 2023 16:50:38 -0400 Subject: [PATCH 015/414] rename script --- ...humb-failures.sql => V5.13.0.2__9506-track-thumb-failures.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.13.0.1__9506-track-thumb-failures.sql => V5.13.0.2__9506-track-thumb-failures.sql} (100%) diff --git a/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V5.13.0.2__9506-track-thumb-failures.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql rename to src/main/resources/db/migration/V5.13.0.2__9506-track-thumb-failures.sql From dbc36c9d938571a5b61156611c445d266fbafe76 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 13 Jun 2023 17:06:19 -0400 Subject: [PATCH 016/414] refactor - remove duplicate code --- .../dataaccess/ImageThumbConverter.java | 29 ++++++------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 921faba7989..fb0785ffd7b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -114,7 +114,11 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s logger.fine("Found cached thumbnail for " + file.getId()); return true; } + return generateThumbnail(storageIO, size); + } + + private static boolean generateThumbnail(StorageIO storageIO, int size) { logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); // Don't try to generate if there have been failures: if (!file.isPreviewsHaveFailed()) { @@ -131,7 +135,6 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } return false; - } // Note that this method works on ALL file types for which thumbnail @@ -446,25 +449,11 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size); // try to generate, if not available and hasn't failed before - logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate base64 thumbnail, file id: " + file.getId()); - if (!file.isPreviewsHaveFailed()) { - boolean generated = false; - if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { - generated = generateImageThumbnail(storageIO, size); - } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { - generated = generatePDFThumbnail(storageIO, size); - } - - if (!generated) { - // Record failure - logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId()); - } else { - // Success - try to open again: - try { - cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); - } catch (Exception ioEx) { - cachedThumbnailChannel = null; - } + if(generateThumbnail(storageIO, size)) { + try { + cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); + } catch (Exception ioEx) { + cachedThumbnailChannel = null; } } From 0c8972304a43c25ed1de1c5cc6cc1c09ef419948 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 14 Jun 2023 10:30:05 -0400 Subject: [PATCH 017/414] try ds logos as url requests --- .../iq/dataverse/ThumbnailServiceWrapper.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index e2bb21c8a4c..66f79472178 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -5,6 +5,7 @@ */ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.api.Datasets; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; @@ -12,7 +13,8 @@ import static edu.harvard.iq.dataverse.dataset.DatasetUtil.datasetLogoThumbnail; import edu.harvard.iq.dataverse.search.SolrSearchResult; import edu.harvard.iq.dataverse.util.FileUtil; -import java.io.File; +import edu.harvard.iq.dataverse.util.SystemConfig; + import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -21,6 +23,8 @@ import java.util.Base64; import java.util.HashMap; import java.util.Map; +import java.util.logging.Logger; + import javax.ejb.EJB; import javax.enterprise.context.RequestScoped; import javax.faces.view.ViewScoped; @@ -36,6 +40,9 @@ @RequestScoped @Named public class ThumbnailServiceWrapper implements java.io.Serializable { + + private static final Logger logger = Logger.getLogger(ThumbnailServiceWrapper.class.getCanonicalName()); + @Inject PermissionsWrapper permissionsWrapper; @EJB @@ -214,7 +221,13 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo this.dvobjectThumbnailsMap.put(datasetId, ""); return null; } + + String url = SystemConfig.getDataverseSiteUrlStatic() + "/datasets/" + dataset.getId() + "/logo"; + logger.fine("getDatasetCardImageAsBase64Url: " + url); + this.dvobjectThumbnailsMap.put(datasetId,url); + return url; +/* String cardImageUrl = null; StorageIO dataAccess = null; @@ -320,6 +333,7 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo //logger.info("dataset id " + result.getEntityId() + ", returning " + cardImageUrl); return cardImageUrl; + */ } // it's the responsibility of the user - to make sure the search result From dc4b6ae5201af228b1b484c6dd430713f8728ccc Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 14 Jun 2023 17:19:41 -0400 Subject: [PATCH 018/414] set the datasetid for search cards --- .../java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 66f79472178..4c3778527d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -194,6 +194,7 @@ public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { return null; } Dataset dataset = (Dataset)result.getEntity(); + dataset.setId(result.getEntityId()); Long versionId = result.getDatasetVersionId(); From 546cfdf2048158320e76a9345e9ebc3caf7ca6c2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 1 Jun 2023 15:09:25 -0400 Subject: [PATCH 019/414] fix non-merge-able error when recording thumb fail --- .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 880b2ea7dc4..ec12480d28d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1148,11 +1148,10 @@ public boolean isThumbnailAvailable (DataFile file) { file.setPreviewImageAvailable(true); this.save(file); return true; - } else { - file.setPreviewsHaveFailed(true); - this.save(file); - return false; } + file.setPreviewsHaveFailed(true); + this.save(file); + return false; } From d3a48dffdfaa56bba065b3c36a2b6469e4227c33 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 14 Jun 2023 17:44:02 -0400 Subject: [PATCH 020/414] typo --- .../java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 4c3778527d7..8dda91fd6a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -223,7 +223,7 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo return null; } - String url = SystemConfig.getDataverseSiteUrlStatic() + "/datasets/" + dataset.getId() + "/logo"; + String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; logger.fine("getDatasetCardImageAsBase64Url: " + url); this.dvobjectThumbnailsMap.put(datasetId,url); return url; From f505428f12a5ead774642837bdb871deda34ee27 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 19 Jun 2023 13:13:01 -0400 Subject: [PATCH 021/414] only send url if thumb should exist --- .../iq/dataverse/ThumbnailServiceWrapper.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 8dda91fd6a3..19c53ffa77e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.api.Datasets; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataset.DatasetUtil; import static edu.harvard.iq.dataverse.dataset.DatasetUtil.datasetLogoThumbnail; @@ -222,6 +223,20 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo this.dvobjectThumbnailsMap.put(datasetId, ""); return null; } + DataFile thumbnailFile = dataset.getThumbnailFile(); + + if (thumbnailFile == null) { + thumbnailFile = DatasetUtil.attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null); + if (thumbnailFile == null) { + logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo available that could be selected automatically."); + return null; + } + } + if (thumbnailFile.isRestricted()) { + logger.fine("Dataset (id :" + dataset.getId() + ") has a logo the user selected but the file must have later been restricted. Returning null."); + return null; + } + String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; logger.fine("getDatasetCardImageAsBase64Url: " + url); From 2d177a60fe67df26bafad35cf237e048a21545ee Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 19 Jun 2023 15:08:15 -0400 Subject: [PATCH 022/414] use inputStream.transferTo --- .../dataaccess/ImageThumbConverter.java | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index fb0785ffd7b..bd87c5541a5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -223,30 +223,32 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s } if (tempFilesRequired) { - ReadableByteChannel pdfFileChannel; - + //ReadableByteChannel pdfFileChannel; + InputStream inputStream = null; try { storageIO.open(); - //inputStream = storageIO.getInputStream(); - pdfFileChannel = storageIO.getReadChannel(); + inputStream = storageIO.getInputStream(); + //pdfFileChannel = storageIO.getReadChannel(); } catch (Exception ioex) { logger.warning("caught Exception trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier()); return false; } File tempFile; - FileChannel tempFileChannel = null; + OutputStream outputStream = null; + //FileChannel tempFileChannel = null; try { tempFile = File.createTempFile("tempFileToRescale", ".tmp"); - tempFileChannel = new FileOutputStream(tempFile).getChannel(); + outputStream = new FileOutputStream(tempFile); + inputStream.transferTo(outputStream); - tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); + //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); } catch (IOException ioex) { logger.warning("GenerateImageThumb: failed to save pdf bytes in a temporary file."); return false; } finally { - IOUtils.closeQuietly(tempFileChannel); - IOUtils.closeQuietly(pdfFileChannel); + IOUtils.closeQuietly(inputStream); + IOUtils.closeQuietly(outputStream); } sourcePdfFile = tempFile; } From 6540b5da0966addffa3a0a6a9d7e67735f89e237 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 19 Jun 2023 15:42:29 -0400 Subject: [PATCH 023/414] add debug --- .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index bd87c5541a5..4a2b8ea0e6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -240,7 +240,8 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s try { tempFile = File.createTempFile("tempFileToRescale", ".tmp"); outputStream = new FileOutputStream(tempFile); - inputStream.transferTo(outputStream); + long sz = inputStream.transferTo(outputStream); + logger.info(" wrote " + sz + " bytes to " + tempFile.getAbsolutePath()); //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); } catch (IOException ioex) { @@ -763,7 +764,7 @@ public static String generatePDFThumbnailFromFile(String fileLocation, int size) try { fileSize = new File(fileLocation).length(); } catch (Exception ex) { - // + logger.warning("Can't open file: " + fileLocation); } if (fileSize == 0 || fileSize > sizeLimit) { From e202d0abc7395fe85218745510b32ade9b6ca770 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 19 Jun 2023 16:15:58 -0400 Subject: [PATCH 024/414] more debug --- .../iq/dataverse/dataaccess/ImageThumbConverter.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 4a2b8ea0e6d..3033269f3bc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -196,6 +196,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s // We rely on ImageMagick to convert PDFs; so if it's not installed, // better give up right away: if (!isImageMagickInstalled()) { + logger.info("Couldn't find IM"); return false; } @@ -218,12 +219,15 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s tempFilesRequired = true; } catch (IOException ioex) { + logger.warning(ioex.getMessage()); + ioex.printStackTrace(); // this on the other hand is likely a fatal condition :( return false; } if (tempFilesRequired) { //ReadableByteChannel pdfFileChannel; + logger.info("Creating temp file"); InputStream inputStream = null; try { storageIO.open(); @@ -241,7 +245,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s tempFile = File.createTempFile("tempFileToRescale", ".tmp"); outputStream = new FileOutputStream(tempFile); long sz = inputStream.transferTo(outputStream); - logger.info(" wrote " + sz + " bytes to " + tempFile.getAbsolutePath()); + logger.info("Wrote " + sz + " bytes to " + tempFile.getAbsolutePath()); //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); } catch (IOException ioex) { From b9cd2bbf0c42fb4e7aada29d7cea817c195ca75d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 20 Jun 2023 10:22:05 -0400 Subject: [PATCH 025/414] include failed preview flag in queries --- .../edu/harvard/iq/dataverse/DatasetVersionServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 439e4b17ed4..0bd0a01aef1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -762,7 +762,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - // + "AND o.previewImageAvailable = false " + + "AND o.previewshavefailed = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + "AND df.contenttype LIKE 'image/%' " @@ -796,7 +796,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - // + "AND o.previewImageAvailable = false " + + "AND o.previewshavefailed = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + "AND df.contenttype = 'application/pdf' " From ac5a9564848ba241a993e8e9252641820e9041b4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 20 Jun 2023 10:22:59 -0400 Subject: [PATCH 026/414] use getThumbnailByVersionId --- .../iq/dataverse/ThumbnailServiceWrapper.java | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 19c53ffa77e..ff5e510e82c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -226,23 +226,20 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo DataFile thumbnailFile = dataset.getThumbnailFile(); if (thumbnailFile == null) { - thumbnailFile = DatasetUtil.attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null); - if (thumbnailFile == null) { - logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo available that could be selected automatically."); - return null; - } - } - if (thumbnailFile.isRestricted()) { - logger.fine("Dataset (id :" + dataset.getId() + ") has a logo the user selected but the file must have later been restricted. Returning null."); - return null; + + // We attempt to auto-select via the optimized, native query-based method + // from the DatasetVersionService: + if (datasetVersionService.getThumbnailByVersionId(versionId) == null) { + return null; + } } - String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; logger.fine("getDatasetCardImageAsBase64Url: " + url); this.dvobjectThumbnailsMap.put(datasetId,url); return url; + /* String cardImageUrl = null; StorageIO dataAccess = null; From 98acd6b50af770779329de1201663d8599edf16a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 20 Jun 2023 10:49:24 -0400 Subject: [PATCH 027/414] cleanup --- .../dataverse/dataaccess/ImageThumbConverter.java | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 3033269f3bc..458b8da227b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -196,7 +196,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s // We rely on ImageMagick to convert PDFs; so if it's not installed, // better give up right away: if (!isImageMagickInstalled()) { - logger.info("Couldn't find IM"); + logger.fine("Couldn't find ImageMagick"); return false; } @@ -220,19 +220,15 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s } catch (IOException ioex) { logger.warning(ioex.getMessage()); - ioex.printStackTrace(); // this on the other hand is likely a fatal condition :( return false; } if (tempFilesRequired) { - //ReadableByteChannel pdfFileChannel; - logger.info("Creating temp file"); InputStream inputStream = null; try { storageIO.open(); inputStream = storageIO.getInputStream(); - //pdfFileChannel = storageIO.getReadChannel(); } catch (Exception ioex) { logger.warning("caught Exception trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier()); return false; @@ -240,14 +236,11 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s File tempFile; OutputStream outputStream = null; - //FileChannel tempFileChannel = null; try { tempFile = File.createTempFile("tempFileToRescale", ".tmp"); outputStream = new FileOutputStream(tempFile); - long sz = inputStream.transferTo(outputStream); - logger.info("Wrote " + sz + " bytes to " + tempFile.getAbsolutePath()); - - //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); + //Reads/transfers all bytes from the input stream to the output stream. + inputStream.transferTo(outputStream); } catch (IOException ioex) { logger.warning("GenerateImageThumb: failed to save pdf bytes in a temporary file."); return false; From 610c65dc9ddd403041ee95475810db2977e57623 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jun 2023 12:56:13 -0400 Subject: [PATCH 028/414] rename and cleanup --- .../edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../iq/dataverse/DataverseServiceBean.java | 45 ------- .../iq/dataverse/ThumbnailServiceWrapper.java | 117 +----------------- .../search/SearchIncludeFragment.java | 2 +- 4 files changed, 6 insertions(+), 160 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 3d608153ba3..2ca1fb825f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -483,7 +483,7 @@ public String getThumbnailString() { thumbnailString = datasetThumbnail.getBase64image(); } else { - thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(dataset, + thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsUrl(dataset, workingVersion.getId(), !workingVersion.isDraft(), ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE); diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index e092f209acd..e99458fbc9d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -346,51 +346,6 @@ public String getDataverseLogoThumbnailAsBase64ById(Long dvId) { } return null; } - - /* - public boolean isDataverseLogoThumbnailAvailable(Dataverse dataverse, User user) { - if (dataverse == null) { - return false; - } - - // First, check if the dataverse has a defined logo: - - //if (dataverse.getDataverseTheme() != null && dataverse.getDataverseTheme().getLogo() != null && !dataverse.getDataverseTheme().getLogo().equals("")) { - File dataverseLogoFile = getLogo(dataverse); - if (dataverseLogoFile != null) { - String logoThumbNailPath = null; - - if (dataverseLogoFile.exists()) { - logoThumbNailPath = ImageThumbConverter.generateImageThumbnailFromFile(dataverseLogoFile.getAbsolutePath(), 48); - if (logoThumbNailPath != null) { - return true; - } - } - } - //} - */ - // If there's no uploaded logo for this dataverse, go through its - // [released] datasets and see if any of them have card images: - // - // TODO: - // Discuss/Decide if we really want to do this - i.e., go through every - // file in every dataset below... - // -- L.A. 4.0 beta14 - /* - for (Dataset dataset : datasetService.findPublishedByOwnerId(dataverse.getId())) { - if (dataset != null) { - DatasetVersion releasedVersion = dataset.getReleasedVersion(); - - if (releasedVersion != null) { - if (datasetService.isDatasetCardImageAvailable(releasedVersion, user)) { - return true; - } - } - } - } */ - /* - return false; - } */ private File getLogo(Dataverse dataverse) { if (dataverse.getId() == null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index ff5e510e82c..c75c29ea094 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -177,7 +177,7 @@ public boolean isThumbnailAvailable(DataFile entity) { // it's the responsibility of the user - to make sure the search result // passed to this method is of the Dataset type! - public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { + public String getDatasetCardImageAsUrl(SolrSearchResult result) { // Before we do anything else, check if it's a harvested dataset; // no need to check anything else if so (harvested datasets never have // thumbnails) @@ -199,10 +199,10 @@ public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { Long versionId = result.getDatasetVersionId(); - return getDatasetCardImageAsBase64Url(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); + return getDatasetCardImageAsUrl(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); } - public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, boolean autoselect, int size) { + public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean autoselect, int size) { Long datasetId = dataset.getId(); if (datasetId != null) { if (this.dvobjectThumbnailsMap.containsKey(datasetId)) { @@ -235,118 +235,9 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo } String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; - logger.fine("getDatasetCardImageAsBase64Url: " + url); + logger.fine("getDatasetCardImageAsUrl: " + url); this.dvobjectThumbnailsMap.put(datasetId,url); return url; - - -/* - String cardImageUrl = null; - StorageIO dataAccess = null; - - try{ - dataAccess = DataAccess.getStorageIO(dataset); - } - catch(IOException ioex){ - // ignore - } - - InputStream in = null; - // See if the dataset already has a dedicated thumbnail ("logo") saved as - // an auxilary file on the dataset level: - // (don't bother checking if it exists; just try to open the input stream) - try { - in = dataAccess.getAuxFileAsInputStream(datasetLogoThumbnail + ".thumb" + size); - //thumb48addedByImageThumbConverter); - } catch (Exception ioex) { - //ignore - } - - if (in != null) { - try { - byte[] bytes = IOUtils.toByteArray(in); - String base64image = Base64.getEncoder().encodeToString(bytes); - cardImageUrl = FileUtil.DATA_URI_SCHEME + base64image; - this.dvobjectThumbnailsMap.put(datasetId, cardImageUrl); - return cardImageUrl; - } catch (IOException ex) { - this.dvobjectThumbnailsMap.put(datasetId, ""); - return null; - // (alternatively, we could ignore the exception, and proceed with the - // regular process of selecting the thumbnail from the available - // image files - ?) - } finally - { - IOUtils.closeQuietly(in); - } - } - - // If not, see if the dataset has one of its image files already assigned - // to be the designated thumbnail: - cardImageUrl = this.getAssignedDatasetImage(dataset, size); - - if (cardImageUrl != null) { - //logger.info("dataset id " + result.getEntity().getId() + " has a dedicated image assigned; returning " + cardImageUrl); - return cardImageUrl; - } - - // And finally, try to auto-select the thumbnail (unless instructed not to): - - if (!autoselect) { - return null; - } - - // We attempt to auto-select via the optimized, native query-based method - // from the DatasetVersionService: - Long thumbnailImageFileId = datasetVersionService.getThumbnailByVersionId(versionId); - - if (thumbnailImageFileId != null) { - //cardImageUrl = FILE_CARD_IMAGE_URL + thumbnailImageFileId; - if (this.dvobjectThumbnailsMap.containsKey(thumbnailImageFileId)) { - // Yes, return previous answer - //logger.info("using cached result for ... "+datasetId); - if (!"".equals(this.dvobjectThumbnailsMap.get(thumbnailImageFileId))) { - return this.dvobjectThumbnailsMap.get(thumbnailImageFileId); - } - return null; - } - - DataFile thumbnailImageFile = null; - - if (dvobjectViewMap.containsKey(thumbnailImageFileId) - && dvobjectViewMap.get(thumbnailImageFileId).isInstanceofDataFile()) { - thumbnailImageFile = (DataFile) dvobjectViewMap.get(thumbnailImageFileId); - } else { - thumbnailImageFile = dataFileService.findCheapAndEasy(thumbnailImageFileId); - if (thumbnailImageFile != null) { - // TODO: - // do we need this file on the map? - it may not even produce - // a thumbnail! - dvobjectViewMap.put(thumbnailImageFileId, thumbnailImageFile); - } else { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, ""); - return null; - } - } - - if (isThumbnailAvailable(thumbnailImageFile)) { - cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( - thumbnailImageFile, - size); - //ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); - } - - if (cardImageUrl != null) { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, cardImageUrl); - } else { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, ""); - } - } - - //logger.info("dataset id " + result.getEntityId() + ", returning " + cardImageUrl); - - return cardImageUrl; - */ } // it's the responsibility of the user - to make sure the search result diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index bfe397cf48c..99fe4cd979b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -1302,7 +1302,7 @@ public void setDisplayCardValues() { result.setImageUrl(thumbnailServiceWrapper.getDataverseCardImageAsBase64Url(result)); } else if (result.getType().equals("datasets")) { if (result.getEntity() != null) { - result.setImageUrl(thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(result)); + result.setImageUrl(thumbnailServiceWrapper.getDatasetCardImageAsUrl(result)); } if (result.isHarvested()) { From 391504de43d8992e4b97d506fdfc763e512a8fc4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jun 2023 13:46:35 -0400 Subject: [PATCH 029/414] api docs --- doc/sphinx-guides/source/api/native-api.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index b39cf91337a..24f6c0d4ced 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -4649,3 +4649,23 @@ A curl example using an ``ID`` curl -X POST -H 'Content-Type:application/json' -d "$JSON" $SERVER_URL/api/admin/feedback Note that this call could be useful in coordinating with dataset authors (assuming they are also contacts) as an alternative/addition to the functionality provided by :ref:`return-a-dataset`. + +.. _thumbnail_reset: + +Reset Thumbnail Failure Flags +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If Dataverse attempts to create a thumbnail image for an image or pdf file and the attempt fails, Dataverse will set a flag for the file to avoid repeated attempts to generate the thumbnail. +For cases where the problem may have been temporary (or fixed in a later Dataverse release), two API calls exist to reset this flag for all files or for a given file. + +Curl examples + +.. code-block:: bash + + export SERVER_URL=http://localhost + export fileID=1234 + + curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag + + curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag/$fileID + From de7963a0635646f6c00e1362fc87152029394839 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jun 2023 13:53:30 -0400 Subject: [PATCH 030/414] refactor typo --- .../iq/dataverse/dataaccess/ImageThumbConverter.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 458b8da227b..febf659b71a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -114,11 +114,11 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s logger.fine("Found cached thumbnail for " + file.getId()); return true; } - return generateThumbnail(storageIO, size); + return generateThumbnail(file, storageIO, size); } - private static boolean generateThumbnail(StorageIO storageIO, int size) { + private static boolean generateThumbnail(DataFile file, StorageIO storageIO, int size) { logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); // Don't try to generate if there have been failures: if (!file.isPreviewsHaveFailed()) { @@ -449,7 +449,7 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size); // try to generate, if not available and hasn't failed before - if(generateThumbnail(storageIO, size)) { + if(generateThumbnail(file, storageIO, size)) { try { cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); } catch (Exception ioEx) { From 36d26d4b0ef9185869a006d78ca3be371dc19112 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 2 Aug 2023 11:52:39 -0400 Subject: [PATCH 031/414] update test cred --- .../harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 0d7c5458e14..081c5a622aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -691,7 +691,7 @@ public static void main(String[] args) { } System.setProperty("dataverse.files.globus.base-uri", "2791b83e-b989-47c5-a7fa-ce65fd949522"); System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); - System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOlprRmxGejNTWDlkTVpUNk92ZmVJaFQyTWY0SDd4cXBoTDNSS29vUmRGVlE9"); + System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOmtsa1RZc242bU1oRXNuUFFwQy9oSzQxSi9EMDV6SjRtUDd1c0ZiN011MEk9"); System.setProperty("dataverse.files.globus.base-store","file"); System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); From 4b755b50bfbe729570dde943c1809ef80b3b840f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 18 Aug 2023 17:25:52 -0400 Subject: [PATCH 032/414] setting is GlobusAppUrl not ...URL --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 8493702406b..a5579c82c6d 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3926,7 +3926,7 @@ GlobusEndpoint is Globus endpoint id used with Globus integration. See :ref:`glo A comma-separated list of the S3 stores that are configured to support Globus integration. See :ref:`globus-support` for details. -:GlobusAppURL +:GlobusAppUrl +++++++++++++ The URL where the `dataverse-globus `_ "transfer" app has been deployed to support Globus integration. See :ref:`globus-support` for details. From 4e6d948d712da42862b9f429d8ef65086a71baab Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 25 Aug 2023 10:00:30 -0400 Subject: [PATCH 033/414] remove req. that app and DV are on same host, note future todo --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 11 +++-------- .../iq/dataverse/globus/GlobusServiceBean.java | 2 ++ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index dcd7eacf50b..b8165f0314f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3329,8 +3329,7 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam(" public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @FormDataParam("jsonData") String jsonData, - @Context UriInfo uriInfo, - @Context HttpHeaders headers + @Context UriInfo uriInfo ) throws IOException, ExecutionException, InterruptedException { logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); @@ -3390,12 +3389,8 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } - String requestUrl = headers.getRequestHeader("origin").get(0); - - if(requestUrl.contains("localhost")){ - requestUrl = "http://localhost:8080"; - } - + String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); + // Async Call globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index c2137dd1f47..5c387710844 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -737,6 +737,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; System.out.println("*******====command ==== " + command); + //ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of calling API + String output = addFilesAsync(command, globusLogger); if (output.equalsIgnoreCase("ok")) { // if(!taskSkippedFiles) From b5e47b98a08f25c1160fc651b84bc1fbefe3dfa4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 29 Aug 2023 12:52:33 -0400 Subject: [PATCH 034/414] fix retrieveSize parsing, refactoring --- .../dataaccess/GlobusOverlayAccessIO.java | 169 ++++++------------ .../dataaccess/RemoteOverlayAccessIO.java | 82 ++++----- 2 files changed, 93 insertions(+), 158 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 081c5a622aa..6a22f8b68f3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -4,14 +4,12 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -56,7 +54,6 @@ /** * @author qqmyers - * @param what it stores */ /* * Globus Overlay Driver @@ -64,14 +61,13 @@ * StorageIdentifier format: :///// */ -public class GlobusOverlayAccessIO extends StorageIO { +public class GlobusOverlayAccessIO extends RemoteOverlayAccessIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); private StorageIO baseStore = null; private String path = null; private String endpointWithBasePath = null; - private String globusToken = null; private static HttpClientContext localContext = HttpClientContext.create(); private PoolingHttpClientConnectionManager cm = null; @@ -117,103 +113,37 @@ private void validatePath(String relPath) throws IOException { } } - @Override - public void open(DataAccessOption... options) throws IOException { - - baseStore.open(options); - - DataAccessRequest req = this.getRequest(); - - if (isWriteAccessRequested(options)) { - isWriteAccess = true; - isReadAccess = false; - } else { - isWriteAccess = false; - isReadAccess = true; - } - - if (dvObject instanceof DataFile) { - String storageIdentifier = dvObject.getStorageIdentifier(); - - DataFile dataFile = this.getDataFile(); - - if (req != null && req.getParameter("noVarHeader") != null) { - baseStore.setNoVarHeader(true); - } - - if (storageIdentifier == null || "".equals(storageIdentifier)) { - throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); - } - - // Fix new DataFiles: DataFiles that have not yet been saved may use this method - // when they don't have their storageidentifier in the final form - // So we fix it up here. ToDo: refactor so that storageidentifier is generated - // by the appropriate StorageIO class and is final from the start. - logger.fine("StorageIdentifier is: " + storageIdentifier); - - if (isReadAccess) { - if (dataFile.getFilesize() >= 0) { - this.setSize(dataFile.getFilesize()); - } else { - logger.fine("Setting size"); - this.setSize(getSizeFromGlobus()); - } - if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") - && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { - - List datavariables = dataFile.getDataTable().getDataVariables(); - String varHeaderLine = generateVariableHeader(datavariables); - this.setVarHeader(varHeaderLine); - } - - } - - this.setMimeType(dataFile.getContentType()); - - try { - this.setFileName(dataFile.getFileMetadata().getLabel()); - } catch (Exception ex) { - this.setFileName("unknown"); - } - } else if (dvObject instanceof Dataset) { - throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); - } else if (dvObject instanceof Dataverse) { - throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); - } else { - this.setSize(getSizeFromGlobus()); - } - } - // Call the Globus API to get the file size - private long getSizeFromGlobus() { + @Override + long retrieveSize() { // Construct Globus URL URI absoluteURI = null; try { int filenameStart = path.lastIndexOf("/") + 1; int pathStart = endpointWithBasePath.indexOf("/"); -logger.info("endpointWithBasePath: " + endpointWithBasePath); + logger.info("endpointWithBasePath: " + endpointWithBasePath); String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "") + path.substring(0, filenameStart); logger.info("directoryPath: " + directoryPath); String filename = path.substring(filenameStart); String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath; - absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + directoryPath + "&filter=name:" + filename); + absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + + "/ls?path=" + directoryPath + "&filter=name:" + filename); HttpGet get = new HttpGet(absoluteURI); - + logger.info("Token is " + globusAccessToken); get.addHeader("Authorization", "Bearer " + globusAccessToken); CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); if (response.getStatusLine().getStatusCode() == 200) { - //Get reponse as string + // Get reponse as string String responseString = EntityUtils.toString(response.getEntity()); logger.fine("Response from " + get.getURI().toString() + " is: " + responseString); JsonObject responseJson = JsonUtil.getJsonObject(responseString); - return (long) responseJson.getInt("size"); + return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size"); } else { - logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode()); + logger.warning("Response from " + get.getURI().toString() + " was " + + response.getStatusLine().getStatusCode()); logger.info(EntityUtils.toString(response.getEntity())); } } catch (URISyntaxException e) { @@ -227,24 +157,6 @@ private long getSizeFromGlobus() { e.printStackTrace(); } return -1; - - /* - * long size = -1; HttpHead head = new HttpHead(endpointWithBasePath + "/" + - * path); try { CloseableHttpResponse response = - * getSharedHttpClient().execute(head, localContext); - * - * try { int code = response.getStatusLine().getStatusCode(); - * logger.fine("Response for HEAD: " + code); switch (code) { case 200: Header[] - * headers = response.getHeaders(HTTP.CONTENT_LEN); logger.fine("Num headers: " - * + headers.length); String sizeString = - * response.getHeaders(HTTP.CONTENT_LEN)[0].getValue(); - * logger.fine("Content-Length: " + sizeString); size = - * Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue()); - * logger.fine("Found file size: " + size); break; default: - * logger.warning("Response from " + head.getURI().toString() + " was " + code); - * } } finally { EntityUtils.consume(response.getEntity()); } } catch - * (IOException e) { logger.warning(e.getMessage()); } return size; - */ } @Override @@ -417,7 +329,7 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { @Override public boolean exists() { logger.fine("Exists called"); - return (getSizeFromGlobus() != -1); + return (retrieveSize() != -1); } @Override @@ -485,9 +397,12 @@ int getUrlExpirationMinutes() { } private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - AccessToken accessToken = GlobusServiceBean.getClientToken(JvmSettings.GLOBUS_TOKEN.lookup(driverId)); + // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId); + String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token"); + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken(); - endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); + // endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); + endpointWithBasePath = System.getProperty("dataverse.files." + this.driverId + ".base-uri"); logger.info("base-uri is " + endpointWithBasePath); if (endpointWithBasePath == null) { throw new IOException("dataverse.files." + this.driverId + ".base-uri is required"); @@ -527,7 +442,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " + logger.warning("Not Implemented: GlobusOverlay store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); throw new IOException("Not implemented"); } @@ -554,7 +469,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " + logger.warning("Not Implemented: GlobusOverlay store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); throw new IOException("Not implemented"); } @@ -640,21 +555,21 @@ private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementExcept @Override public void savePath(Path fileSystemPath) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: savePath() not implemented in this storage driver."); + "GlobusOverlayAccessIO: savePath() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + "GlobusOverlayAccessIO: saveInputStream() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + "GlobusOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); } @@ -689,24 +604,42 @@ public static void main(String[] args) { if (args.length > 0) { System.out.printf("List of arguments: {}", Arrays.toString(args)); } - System.setProperty("dataverse.files.globus.base-uri", "2791b83e-b989-47c5-a7fa-ce65fd949522"); + // System.setProperty("dataverse.files.globus.globus_client_id", + // "2791b83e-b989-47c5-a7fa-ce65fd949522"); + System.setProperty("dataverse.files.globus.base-uri", "d8c42580-6528-4605-9ad8-116a61982644"); System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); - System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOmtsa1RZc242bU1oRXNuUFFwQy9oSzQxSi9EMDV6SjRtUDd1c0ZiN011MEk9"); - System.setProperty("dataverse.files.globus.base-store","file"); - System.setProperty("dataverse.files.file.type", - DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + // System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOkt4ZEdndFVDUDVZZG5sRG4rRHEzaVMxTHBtTVRGNlB3RjlwWm9kRTBWNVE9"); + System.setProperty("dataverse.files.globus.globus-token", + "YTVlNzFjNzItYWVkYi00Mzg4LTkzNWQtY2NhM2IyODI2MzdmOnErQXRBeWNEMVM3amFWVnB0RlFnRk5zMTc3OFdDa3lGeVZPT3k0RDFpaXM9"); + System.setProperty("dataverse.files.globus.base-store", "file"); + System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); System.setProperty("dataverse.files.file.directory", "/tmp/files"); logger.info(JvmSettings.BASE_URI.lookup("globus")); - - - + logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus")); + try { - GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO("globus://1234///hdc1/image001.mrc", "globus"); - logger.info("Size is " + gsio.getSizeFromGlobus()); - + GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO( + "globus://1234///hdc1/image001.mrc", "globus"); + logger.info("Size is " + gsio.retrieveSize()); + } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } + try { + DataFile df = new DataFile(); + Dataset ds = new Dataset(); + ds.setAuthority("10.5072"); + ds.setIdentifier("FK21234"); + df.setOwner(ds); + df.setStorageIdentifier("globus://1234///hdc1/image001.mrc"); + GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO(df, null, "globus"); + logger.info("Size2 is " + gsio.retrieveSize()); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index ee2b6779cba..710d7a38fb4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -85,7 +85,7 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); validatePath(path); - + logger.fine("Base URL: " + path); } @@ -98,18 +98,17 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE validatePath(path); logger.fine("Base URL: " + path); } - + private void validatePath(String relPath) throws IOException { try { URI absoluteURI = new URI(baseUrl + "/" + relPath); - if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); } - } - + } @Override public void open(DataAccessOption... options) throws IOException { @@ -150,7 +149,7 @@ public void open(DataAccessOption... options) throws IOException { this.setSize(dataFile.getFilesize()); } else { logger.fine("Setting size"); - this.setSize(getSizeFromHttpHeader()); + this.setSize(retrieveSize()); } if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { @@ -171,16 +170,14 @@ public void open(DataAccessOption... options) throws IOException { } } else if (dvObject instanceof Dataset) { throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); } else if (dvObject instanceof Dataverse) { throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); - } else { - this.setSize(getSizeFromHttpHeader()); + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); } } - private long getSizeFromHttpHeader() { + long retrieveSize() { long size = -1; HttpHead head = new HttpHead(baseUrl + "/" + path); try { @@ -356,8 +353,9 @@ public String getStorageLocation() throws IOException { String fullStorageLocation = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStorageLocation); int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + if (driverIndex >= 0) { + fullStorageLocation = fullStorageLocation + .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); } if (this.getDvObject() instanceof Dataset) { throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); @@ -379,7 +377,7 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { @Override public boolean exists() { logger.fine("Exists called"); - return (getSizeFromHttpHeader() != -1); + return (retrieveSize() != -1); } @Override @@ -407,7 +405,7 @@ public boolean downloadRedirectEnabled() { } return false; } - + public boolean downloadRedirectEnabled(String auxObjectTag) { return baseStore.downloadRedirectEnabled(auxObjectTag); } @@ -422,8 +420,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary if (secretKey == null) { return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", - secretKey); + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); } } else { return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); @@ -464,9 +461,10 @@ private void configureStores(DataAccessRequest req, String driverId, String stor if (baseStore == null) { String baseDriverId = getBaseStoreIdFor(driverId); String fullStorageLocation = null; - String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - - if(dvObject instanceof Dataset) { + String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if (dvObject instanceof Dataset) { baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); } else { if (this.getDvObject() != null) { @@ -481,8 +479,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + + "/" + fullStorageLocation; break; default: logger.warning("Not Implemented: RemoteOverlay store with base store type: " @@ -492,12 +490,12 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } else if (storageLocation != null) { // ://// - //remoteDriverId:// is removed if coming through directStorageIO + // remoteDriverId:// is removed if coming through directStorageIO int index = storageLocation.indexOf(DataAccess.SEPARATOR); - if(index > 0) { + if (index > 0) { storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); } - //THe base store needs the baseStoreIdentifier and not the relative URL + // THe base store needs the baseStoreIdentifier and not the relative URL fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); switch (baseDriverType) { @@ -508,8 +506,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + + "/" + fullStorageLocation; break; default: logger.warning("Not Implemented: RemoteOverlay store with base store type: " @@ -525,37 +523,41 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); try { - remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); - } catch(MalformedURLException mfue) { + remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); + } catch (MalformedURLException mfue) { logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); } } - //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required. + // Convenience method to assemble the path, starting with the DOI + // authority/identifier/, that is needed to create a base store via + // DataAccess.getDirectStorageIO - the caller has to add the store type specific + // prefix required. private String getStoragePath() throws IOException { String fullStoragePath = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStoragePath); int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + if (driverIndex >= 0) { + fullStoragePath = fullStoragePath + .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); } int suffixIndex = fullStoragePath.indexOf("//"); - if(suffixIndex >=0) { - fullStoragePath = fullStoragePath.substring(0, suffixIndex); + if (suffixIndex >= 0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); } if (this.getDvObject() instanceof Dataset) { fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; } else if (this.getDvObject() instanceof DataFile) { fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; - }else if (dvObject instanceof Dataverse) { + + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (dvObject instanceof Dataverse) { throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); } logger.fine("fullStoragePath: " + fullStoragePath); return fullStoragePath; } - + public CloseableHttpClient getSharedHttpClient() { if (httpclient == null) { try { @@ -617,11 +619,11 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); try { URI absoluteURI = new URI(baseUrl + "/" + urlPath); - if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); return false; } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); return false; } From cec0b519948d8ba480f49f915dabd5f31e5c5082 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 29 Aug 2023 12:52:48 -0400 Subject: [PATCH 035/414] add globus type --- .../java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index d046fa4661d..f2eb0236df4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -48,6 +48,7 @@ public DataAccess() { public static final String S3 = "s3"; static final String SWIFT = "swift"; static final String REMOTE = "remote"; + static final String GLOBUS = "globus"; static final String TMP = "tmp"; public static final String SEPARATOR = "://"; //Default to "file" is for tests only @@ -98,6 +99,8 @@ protected static StorageIO getStorageIO(T dvObject, Data return new SwiftAccessIO<>(dvObject, req, storageDriverId); case REMOTE: return new RemoteOverlayAccessIO<>(dvObject, req, storageDriverId); + case GLOBUS: + return new GlobusOverlayAccessIO<>(dvObject, req, storageDriverId); case TMP: throw new IOException( "DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); @@ -369,6 +372,8 @@ public static boolean isValidDirectStorageIdentifier(String storageId) { return S3AccessIO.isValidIdentifier(driverId, storageId); case REMOTE: return RemoteOverlayAccessIO.isValidIdentifier(driverId, storageId); + case GLOBUS: + return GlobusOverlayAccessIO.isValidIdentifier(driverId, storageId); default: logger.warning("Request to validate for storage driver: " + driverId); } From 555bf05af241c555300f5c528656de3d10b3c584 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 30 Aug 2023 10:07:49 -0400 Subject: [PATCH 036/414] refactoring g store as a remotestore --- .../dataaccess/GlobusOverlayAccessIO.java | 370 +++--------------- .../dataaccess/RemoteOverlayAccessIO.java | 46 +-- .../iq/dataverse/settings/JvmSettings.java | 2 +- 3 files changed, 80 insertions(+), 338 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 6a22f8b68f3..16345cd1f9c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -58,28 +58,26 @@ /* * Globus Overlay Driver * - * StorageIdentifier format: :///// + * Remote: + * StorageIdentifier format: ://// + * Storage location: / + * Internal + * StorageIdentifier format: ://// + * Storage location: /// + * + * baseUrl: globus:// + */ public class GlobusOverlayAccessIO extends RemoteOverlayAccessIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); - private StorageIO baseStore = null; - private String path = null; - private String endpointWithBasePath = null; - - private static HttpClientContext localContext = HttpClientContext.create(); - private PoolingHttpClientConnectionManager cm = null; - CloseableHttpClient httpclient = null; - private int timeout = 1200; - private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) - .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); - private static boolean trustCerts = false; - private int httpConcurrency = 4; private String globusAccessToken = null; + /* + * If this is set to true, the store supports Globus transfer in and Dataverse/the globus app manage file locations, access controls, deletion, etc. + */ + private boolean isDataverseManaged = false; public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); @@ -104,9 +102,9 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE private void validatePath(String relPath) throws IOException { try { - URI absoluteURI = new URI(endpointWithBasePath + "/" + relPath); - if (!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) { - throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s endpoint/basePath"); + URI absoluteURI = new URI(baseUrl + "/" + relPath); + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { + throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); } } catch (URISyntaxException use) { throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); @@ -120,13 +118,24 @@ long retrieveSize() { URI absoluteURI = null; try { int filenameStart = path.lastIndexOf("/") + 1; + String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3); int pathStart = endpointWithBasePath.indexOf("/"); logger.info("endpointWithBasePath: " + endpointWithBasePath); - String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "") - + path.substring(0, filenameStart); + String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart+1) : ""); logger.info("directoryPath: " + directoryPath); + + if(isDataverseManaged) { + Dataset ds = ((DataFile) dvObject).getOwner(); + directoryPath = directoryPath + "/" + ds.getAuthority() + "/" + ds.getIdentifier(); + logger.info("directoryPath now: " + directoryPath); + + } + if(filenameStart > 0) { + directoryPath = directoryPath + path.substring(0, filenameStart); + } + logger.info("directoryPath finally: " + directoryPath); String filename = path.substring(filenameStart); - String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath; + String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + directoryPath + "&filter=name:" + filename); @@ -138,7 +147,7 @@ long retrieveSize() { if (response.getStatusLine().getStatusCode() == 200) { // Get reponse as string String responseString = EntityUtils.toString(response.getEntity()); - logger.fine("Response from " + get.getURI().toString() + " is: " + responseString); + logger.info("Response from " + get.getURI().toString() + " is: " + responseString); JsonObject responseJson = JsonUtil.getJsonObject(responseString); return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size"); } else { @@ -159,63 +168,26 @@ long retrieveSize() { return -1; } - @Override - public InputStream getInputStream() throws IOException { - if (super.getInputStream() == null) { - try { - HttpGet get = new HttpGet(generateTemporaryDownloadUrl(null, null, null)); - CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); - int code = response.getStatusLine().getStatusCode(); - switch (code) { - case 200: - setInputStream(response.getEntity().getContent()); - break; - default: - logger.warning("Response from " + get.getURI().toString() + " was " + code); - throw new IOException("Cannot retrieve: " + endpointWithBasePath + "/" + path + " code: " + code); - } - } catch (Exception e) { - logger.warning(e.getMessage()); - e.printStackTrace(); - throw new IOException("Error retrieving: " + endpointWithBasePath + "/" + path + " " + e.getMessage()); - } - setChannel(Channels.newChannel(super.getInputStream())); - } - return super.getInputStream(); - } - - @Override - public Channel getChannel() throws IOException { - if (super.getChannel() == null) { - getInputStream(); - } - return channel; - } - - @Override - public ReadableByteChannel getReadChannel() throws IOException { - // Make sure StorageIO.channel variable exists - getChannel(); - return super.getReadChannel(); - } @Override public void delete() throws IOException { + +// Fix // Delete is best-effort - we tell the remote server and it may or may not // implement this call if (!isDirectAccess()) { throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); } try { - HttpDelete del = new HttpDelete(endpointWithBasePath + "/" + path); + HttpDelete del = new HttpDelete(baseUrl + "/" + path); CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); try { int code = response.getStatusLine().getStatusCode(); switch (code) { case 200: - logger.fine("Sent DELETE for " + endpointWithBasePath + "/" + path); + logger.fine("Sent DELETE for " + baseUrl + "/" + path); default: logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); } @@ -224,7 +196,7 @@ public void delete() throws IOException { } } catch (Exception e) { logger.warning(e.getMessage()); - throw new IOException("Error deleting: " + endpointWithBasePath + "/" + path); + throw new IOException("Error deleting: " + baseUrl + "/" + path); } @@ -233,146 +205,20 @@ public void delete() throws IOException { } - @Override - public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { - return baseStore.openAuxChannel(auxItemTag, options); - } - - @Override - public boolean isAuxObjectCached(String auxItemTag) throws IOException { - return baseStore.isAuxObjectCached(auxItemTag); - } - - @Override - public long getAuxObjectSize(String auxItemTag) throws IOException { - return baseStore.getAuxObjectSize(auxItemTag); - } - - @Override - public Path getAuxObjectAsPath(String auxItemTag) throws IOException { - return baseStore.getAuxObjectAsPath(auxItemTag); - } - - @Override - public void backupAsAux(String auxItemTag) throws IOException { - baseStore.backupAsAux(auxItemTag); - } - - @Override - public void revertBackupAsAux(String auxItemTag) throws IOException { - baseStore.revertBackupAsAux(auxItemTag); - } - - @Override - // this method copies a local filesystem Path into this DataAccess Auxiliary - // location: - public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { - baseStore.savePathAsAux(fileSystemPath, auxItemTag); - } - - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); - } - - /** - * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") - * @throws IOException if anything goes wrong. - */ - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag); - } - - @Override - public List listAuxObjects() throws IOException { - return baseStore.listAuxObjects(); - } - - @Override - public void deleteAuxObject(String auxItemTag) throws IOException { - baseStore.deleteAuxObject(auxItemTag); - } - - @Override - public void deleteAllAuxObjects() throws IOException { - baseStore.deleteAllAuxObjects(); - } - - @Override - public String getStorageLocation() throws IOException { - String fullStorageLocation = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStorageLocation); - int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); - if (driverIndex >= 0) { - fullStorageLocation = fullStorageLocation - .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - if (this.getDvObject() instanceof Dataset) { - throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); - } else if (this.getDvObject() instanceof DataFile) { - fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; - } else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStorageLocation: " + fullStorageLocation); - return fullStorageLocation; - } - - @Override - public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { - throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); - } - - @Override - public boolean exists() { - logger.fine("Exists called"); - return (retrieveSize() != -1); - } - @Override - public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { - throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: there are no write Channels associated with S3 objects."); - } - @Override - public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException { - throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects."); - } - - @Override - public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { - return baseStore.getAuxFileAsInputStream(auxItemTag); - } - - @Override - public boolean downloadRedirectEnabled() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); - if ("true".equalsIgnoreCase(optionValue)) { - return true; - } - return false; - } - - public boolean downloadRedirectEnabled(String auxObjectTag) { - return baseStore.downloadRedirectEnabled(auxObjectTag); - } @Override public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { - +//Fix // ToDo - support remote auxiliary Files if (auxiliaryTag == null) { String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); if (secretKey == null) { - return endpointWithBasePath + "/" + path; + return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(endpointWithBasePath + "/" + path, getUrlExpirationMinutes(), null, "GET", + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); } } else { @@ -380,35 +226,21 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary } } - int getUrlExpirationMinutes() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); - if (optionValue != null) { - Integer num; - try { - num = Integer.parseInt(optionValue); - } catch (NumberFormatException ex) { - num = null; - } - if (num != null) { - return num; - } - } - return 60; - } - private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId); String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token"); + isDataverseManaged = Boolean.getBoolean("dataverse.files." + this.driverId + ".managed"); + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken(); // endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); - endpointWithBasePath = System.getProperty("dataverse.files." + this.driverId + ".base-uri"); - logger.info("base-uri is " + endpointWithBasePath); - if (endpointWithBasePath == null) { - throw new IOException("dataverse.files." + this.driverId + ".base-uri is required"); + baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); + logger.info("base-url is " + baseUrl); + if (baseUrl == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); } else { try { - new URI(endpointWithBasePath); + new URI(baseUrl); } catch (Exception e) { logger.warning( "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); @@ -442,9 +274,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: GlobusOverlay store with base store type: " + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); + throw new IOException("Not supported"); } } else if (storageLocation != null) { @@ -469,9 +301,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: GlobusOverlay store with base store type: " + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); + throw new IOException("Not supported"); } } baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); @@ -488,97 +320,13 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } } - // Convenience method to assemble the path, starting with the DOI - // authority/identifier/, that is needed to create a base store via - // DataAccess.getDirectStorageIO - the caller has to add the store type specific - // prefix required. - private String getStoragePath() throws IOException { - String fullStoragePath = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStoragePath); - int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); - if (driverIndex >= 0) { - fullStoragePath = fullStoragePath - .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - int suffixIndex = fullStoragePath.indexOf("//"); - if (suffixIndex >= 0) { - fullStoragePath = fullStoragePath.substring(0, suffixIndex); - } - if (this.getDvObject() instanceof Dataset) { - fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" - + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; - } else if (this.getDvObject() instanceof DataFile) { - fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; - } else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStoragePath: " + fullStoragePath); - return fullStoragePath; - } - - public CloseableHttpClient getSharedHttpClient() { - if (httpclient == null) { - try { - initHttpPool(); - httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); - - } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { - logger.warning(ex.getMessage()); - } - } - return httpclient; - } - - private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { - if (trustCerts) { - // use the TrustSelfSignedStrategy to allow Self Signed Certificates - SSLContext sslContext; - SSLConnectionSocketFactory connectionFactory; - - sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); - // create an SSL Socket Factory to use the SSLContext with the trust self signed - // certificate strategy - // and allow all hosts verifier. - connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); - - Registry registry = RegistryBuilder.create() - .register("https", connectionFactory).build(); - cm = new PoolingHttpClientConnectionManager(registry); - } else { - cm = new PoolingHttpClientConnectionManager(); - } - cm.setDefaultMaxPerRoute(httpConcurrency); - cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); - } - - @Override - public void savePath(Path fileSystemPath) throws IOException { - throw new UnsupportedDataAccessOperationException( - "GlobusOverlayAccessIO: savePath() not implemented in this storage driver."); - - } - - @Override - public void saveInputStream(InputStream inputStream) throws IOException { - throw new UnsupportedDataAccessOperationException( - "GlobusOverlayAccessIO: saveInputStream() not implemented in this storage driver."); - - } - - @Override - public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { - throw new UnsupportedDataAccessOperationException( - "GlobusOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); - - } protected static boolean isValidIdentifier(String driverId, String storageId) { String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); - String baseUri = System.getProperty("dataverse.files." + driverId + ".base-uri"); + String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); try { - URI absoluteURI = new URI(baseUri + "/" + urlPath); - if (!absoluteURI.normalize().toString().startsWith(baseUri)) { + URI absoluteURI = new URI(baseUrl + "/" + urlPath); + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); return false; } @@ -590,14 +338,6 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { return true; } - public static String getBaseStoreIdFor(String driverId) { - return System.getProperty("dataverse.files." + driverId + ".base-store"); - } - - @Override - public List cleanUp(Predicate filter, boolean dryRun) throws IOException { - return baseStore.cleanUp(filter, dryRun); - } public static void main(String[] args) { System.out.println("Running the main method"); @@ -606,7 +346,7 @@ public static void main(String[] args) { } // System.setProperty("dataverse.files.globus.globus_client_id", // "2791b83e-b989-47c5-a7fa-ce65fd949522"); - System.setProperty("dataverse.files.globus.base-uri", "d8c42580-6528-4605-9ad8-116a61982644"); + System.setProperty("dataverse.files.globus.base-url", "globus://d8c42580-6528-4605-9ad8-116a61982644"); System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); // System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOkt4ZEdndFVDUDVZZG5sRG4rRHEzaVMxTHBtTVRGNlB3RjlwWm9kRTBWNVE9"); System.setProperty("dataverse.files.globus.globus-token", @@ -614,7 +354,7 @@ public static void main(String[] args) { System.setProperty("dataverse.files.globus.base-store", "file"); System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); System.setProperty("dataverse.files.file.directory", "/tmp/files"); - logger.info(JvmSettings.BASE_URI.lookup("globus")); + logger.info(JvmSettings.BASE_URL.lookup("globus")); logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus")); try { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 710d7a38fb4..6b15bcf1dc8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -52,31 +52,32 @@ /** * @author qqmyers - * @param what it stores */ /* * Remote Overlay Driver * * StorageIdentifier format: - * ://// + * ://// + * + * baseUrl: http(s):// */ public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); - private StorageIO baseStore = null; - private String path = null; - private String baseUrl = null; + protected StorageIO baseStore = null; + protected String path = null; + protected String baseUrl = null; - private static HttpClientContext localContext = HttpClientContext.create(); - private PoolingHttpClientConnectionManager cm = null; + protected static HttpClientContext localContext = HttpClientContext.create(); + protected PoolingHttpClientConnectionManager cm = null; CloseableHttpClient httpclient = null; - private int timeout = 1200; - private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + protected int timeout = 1200; + protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); - private static boolean trustCerts = false; - private int httpConcurrency = 4; + protected static boolean trustCerts = false; + protected int httpConcurrency = 4; public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); @@ -86,7 +87,7 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); validatePath(path); - logger.fine("Base URL: " + path); + logger.fine("Relative path: " + path); } public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException { @@ -96,7 +97,7 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); validatePath(path); - logger.fine("Base URL: " + path); + logger.fine("Relative path: " + path); } private void validatePath(String relPath) throws IOException { @@ -420,7 +421,8 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary if (secretKey == null) { return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", + secretKey); } } else { return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); @@ -483,9 +485,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); + throw new IOException("Not supported"); } } else if (storageLocation != null) { @@ -510,9 +512,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); + throw new IOException("Not supported"); } } baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); @@ -533,7 +535,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor // authority/identifier/, that is needed to create a base store via // DataAccess.getDirectStorageIO - the caller has to add the store type specific // prefix required. - private String getStoragePath() throws IOException { + protected String getStoragePath() throws IOException { String fullStoragePath = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStoragePath); int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); @@ -596,21 +598,21 @@ private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementExcept @Override public void savePath(Path fileSystemPath) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: savePath() not implemented in this storage driver."); + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + this.getClass().getName() + ": saveInputStream() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + this.getClass().getName() + ": saveInputStream(InputStream, Long) not implemented in this storage driver."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 451dbcc56d1..ffe08a6afb9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -50,7 +50,7 @@ public enum JvmSettings { SCOPE_FILES(PREFIX, "files"), FILES_DIRECTORY(SCOPE_FILES, "directory"), FILES(SCOPE_FILES), - BASE_URI(FILES, "base-uri"), + BASE_URL(FILES, "base-url"), GLOBUS_TOKEN(FILES, "globus-token"), // SOLR INDEX SETTINGS From 270e0fd0a28b516f62dc29e927bbb19753f47d19 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Sep 2023 10:08:33 -0400 Subject: [PATCH 037/414] temporary fix for local compile issues --- .../harvest/server/web/servlet/OAIServlet.java | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 9cf1629abfc..3ce88fdf204 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -74,18 +74,13 @@ public class OAIServlet extends HttpServlet { @EJB SystemConfig systemConfig; - - @Inject - @ConfigProperty(name = "dataverse.oai.server.maxidentifiers", defaultValue="100") - private Integer maxListIdentifiers; - @Inject - @ConfigProperty(name = "dataverse.oai.server.maxsets", defaultValue="100") - private Integer maxListSets; + //Todo - revert this change - added to get past some local compile issues + private Integer maxListIdentifiers=100; + + private Integer maxListSets=100; - @Inject - @ConfigProperty(name = "dataverse.oai.server.maxrecords", defaultValue="10") - private Integer maxListRecords; + private Integer maxListRecords=10; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.web.servlet.OAIServlet"); // If we are going to stick with this solution - of providing a minimalist From 1828855a162683d564e02507ce60fd99963b43d0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Sep 2023 10:09:53 -0400 Subject: [PATCH 038/414] updates/fixes re: extending RemoteOverlay, etc. --- .../iq/dataverse/dataaccess/DataAccess.java | 2 + .../dataaccess/GlobusOverlayAccessIO.java | 208 +++++++----------- .../dataaccess/RemoteOverlayAccessIO.java | 9 +- .../iq/dataverse/dataaccess/StorageIO.java | 2 +- .../dataverse/globus/GlobusServiceBean.java | 52 +++-- 5 files changed, 119 insertions(+), 154 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index f2eb0236df4..8387f8110cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -132,6 +132,8 @@ public static StorageIO getDirectStorageIO(String fullStorageLocation) return new SwiftAccessIO<>(storageLocation, storageDriverId); case REMOTE: return new RemoteOverlayAccessIO<>(storageLocation, storageDriverId); + case GLOBUS: + return new GlobusOverlayAccessIO<>(storageLocation, storageDriverId); default: logger.warning("Could not find storage driver for: " + fullStorageLocation); throw new IOException("getDirectStorageIO: Unsupported storage method."); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 16345cd1f9c..b00724e2825 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -58,46 +58,63 @@ /* * Globus Overlay Driver * - * Remote: - * StorageIdentifier format: ://// - * Storage location: / - * Internal - * StorageIdentifier format: ://// - * Storage location: /// + * Remote: StorageIdentifier format: + * ://// Storage location: + * / Internal StorageIdentifier format: + * :// Storage location: + * /// * * baseUrl: globus:// - + * */ public class GlobusOverlayAccessIO extends RemoteOverlayAccessIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); - - private String globusAccessToken = null; + String globusAccessToken = null; /* - * If this is set to true, the store supports Globus transfer in and Dataverse/the globus app manage file locations, access controls, deletion, etc. + * If this is set to true, the store supports Globus transfer in and + * Dataverse/the globus app manage file locations, access controls, deletion, + * etc. */ - private boolean isDataverseManaged = false; + private boolean dataverseManaged = false; public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); - this.setIsLocalFile(false); - configureStores(req, driverId, null); - logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); - path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); - validatePath(path); + if (dvObject instanceof DataFile) { + globusAccessToken = retrieveGlobusAccessToken(); + } + dataverseManaged = isDataverseManaged(this.driverId); - logger.fine("Relative path: " + path); + logger.info("GAT3: " + globusAccessToken); } public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { - super(null, null, driverId); - this.setIsLocalFile(false); - configureStores(null, driverId, storageLocation); + this.driverId = driverId; + this.dataverseManaged = isDataverseManaged(this.driverId); + if (dataverseManaged) { + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); + path = parts[1]; + } else { + this.setIsLocalFile(false); + configureStores(null, driverId, storageLocation); + + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Relative path: " + path); + } +//ToDo - only when needed? + globusAccessToken = retrieveGlobusAccessToken(); + + } + + private String retrieveGlobusAccessToken() { + // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId); + String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token"); - path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); - validatePath(path); - logger.fine("Relative path: " + path); + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); + return accessToken.getOtherTokens().get(0).getAccessToken(); } private void validatePath(String relPath) throws IOException { @@ -114,6 +131,7 @@ private void validatePath(String relPath) throws IOException { // Call the Globus API to get the file size @Override long retrieveSize() { + logger.info("GAT2: " + globusAccessToken); // Construct Globus URL URI absoluteURI = null; try { @@ -121,16 +139,16 @@ long retrieveSize() { String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3); int pathStart = endpointWithBasePath.indexOf("/"); logger.info("endpointWithBasePath: " + endpointWithBasePath); - String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart+1) : ""); + String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); logger.info("directoryPath: " + directoryPath); - if(isDataverseManaged) { + if (dataverseManaged && (dvObject!=null)) { Dataset ds = ((DataFile) dvObject).getOwner(); directoryPath = directoryPath + "/" + ds.getAuthority() + "/" + ds.getIdentifier(); logger.info("directoryPath now: " + directoryPath); } - if(filenameStart > 0) { + if (filenameStart > 0) { directoryPath = directoryPath + path.substring(0, filenameStart); } logger.info("directoryPath finally: " + directoryPath); @@ -168,12 +186,15 @@ long retrieveSize() { return -1; } - - - + + @Override + public InputStream getInputStream() throws IOException { + throw new IOException("Not implemented"); + } + @Override public void delete() throws IOException { - + // Fix // Delete is best-effort - we tell the remote server and it may or may not // implement this call @@ -205,9 +226,6 @@ public void delete() throws IOException { } - - - @Override public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { @@ -218,114 +236,37 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary if (secretKey == null) { return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", - secretKey); + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); } } else { return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); } } - private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId); - String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token"); - isDataverseManaged = Boolean.getBoolean("dataverse.files." + this.driverId + ".managed"); + private static boolean isDataverseManaged(String driverId) { + return Boolean.getBoolean("dataverse.files." + driverId + ".managed"); + } - AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); - globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken(); - // endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); - baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); - logger.info("base-url is " + baseUrl); + static boolean isValidIdentifier(String driverId, String storageId) { + String baseIdentifier = storageId.substring(storageId.lastIndexOf("//") + 2); + String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); if (baseUrl == null) { - throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); - } else { - try { - new URI(baseUrl); - } catch (Exception e) { - logger.warning( - "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); - throw new IOException("Can't interpret base-url as a URI"); - } - + return false; } - - if (baseStore == null) { - String baseDriverId = getBaseStoreIdFor(driverId); - String fullStorageLocation = null; - String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", - DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - - if (dvObject instanceof Dataset) { - baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); + // Internally managed endpoints require standard name pattern (submitted via + // /addFile(s) api) + if (isDataverseManaged(driverId)) { + boolean hasStandardName = usesStandardNamePattern(baseIdentifier); + if (hasStandardName) { + return true; } else { - if (this.getDvObject() != null) { - fullStorageLocation = getStoragePath(); - - // S3 expects :/// - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") - + "/" + fullStorageLocation; - break; - default: - logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not supported"); - } - - } else if (storageLocation != null) { - // ://// - // remoteDriverId:// is removed if coming through directStorageIO - int index = storageLocation.indexOf(DataAccess.SEPARATOR); - if (index > 0) { - storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); - } - // THe base store needs the baseStoreIdentifier and not the relative URL - fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); - - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") - + "/" + fullStorageLocation; - break; - default: - logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not supported"); - } - } - baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); - } - if (baseDriverType.contentEquals(DataAccess.S3)) { - ((S3AccessIO) baseStore).setMainDriver(false); + logger.warning("Unacceptable identifier pattern in submitted identifier: " + baseIdentifier); + return false; } } - remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); + // Remote endpoints require a valid URI within the baseUrl try { - remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); - } catch (MalformedURLException mfue) { - logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); - } - } - - - protected static boolean isValidIdentifier(String driverId, String storageId) { - String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); - String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); - try { - URI absoluteURI = new URI(baseUrl + "/" + urlPath); + URI absoluteURI = new URI(baseUrl + "/" + baseIdentifier); if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); return false; @@ -338,7 +279,6 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { return true; } - public static void main(String[] args) { System.out.println("Running the main method"); if (args.length > 0) { @@ -347,15 +287,19 @@ public static void main(String[] args) { // System.setProperty("dataverse.files.globus.globus_client_id", // "2791b83e-b989-47c5-a7fa-ce65fd949522"); System.setProperty("dataverse.files.globus.base-url", "globus://d8c42580-6528-4605-9ad8-116a61982644"); - System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); + System.out.println("NotValid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); + System.out.println("ValidRemote: " + isValidIdentifier("globus", "globus://localid//of/the/hill")); + System.setProperty("dataverse.files.globus.managed", "true"); + + System.out.println("ValidLocal: " + isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42")); // System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOkt4ZEdndFVDUDVZZG5sRG4rRHEzaVMxTHBtTVRGNlB3RjlwWm9kRTBWNVE9"); System.setProperty("dataverse.files.globus.globus-token", "YTVlNzFjNzItYWVkYi00Mzg4LTkzNWQtY2NhM2IyODI2MzdmOnErQXRBeWNEMVM3amFWVnB0RlFnRk5zMTc3OFdDa3lGeVZPT3k0RDFpaXM9"); System.setProperty("dataverse.files.globus.base-store", "file"); System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); System.setProperty("dataverse.files.file.directory", "/tmp/files"); - logger.info(JvmSettings.BASE_URL.lookup("globus")); - logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus")); + // logger.info(JvmSettings.BASE_URL.lookup("globus")); + // logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus")); try { GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO( diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 6b15bcf1dc8..a9653f2ab68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -65,6 +65,8 @@ public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); + String globusAccessToken = null; + protected StorageIO baseStore = null; protected String path = null; protected String baseUrl = null; @@ -79,6 +81,9 @@ public class RemoteOverlayAccessIO extends StorageIO { protected static boolean trustCerts = false; protected int httpConcurrency = 4; + public RemoteOverlayAccessIO() { + } + public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); this.setIsLocalFile(false); @@ -445,7 +450,7 @@ int getUrlExpirationMinutes() { return 60; } - private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); if (baseUrl == null) { throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); @@ -616,7 +621,7 @@ public void saveInputStream(InputStream inputStream, Long filesize) throws IOExc } - protected static boolean isValidIdentifier(String driverId, String storageId) { + static boolean isValidIdentifier(String driverId, String storageId) { String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); try { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index bfd5c5f0d8f..333d72e09b2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -611,7 +611,7 @@ public static boolean isDirectUploadEnabled(String driverId) { //Check that storageIdentifier is consistent with store's config //False will prevent direct uploads - protected static boolean isValidIdentifier(String driverId, String storageId) { + static boolean isValidIdentifier(String driverId, String storageId) { return false; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 5c387710844..d98e1c9b7f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -594,11 +594,10 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin globusLogger.info("Starting an globusUpload "); - String datasetIdentifier = dataset.getStorageIdentifier(); - + // ToDo - use DataAccess methods? - String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); - datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); + //String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); + //datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); Thread.sleep(5000); @@ -670,18 +669,26 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin JsonArray filesJsonArray = jsonObject.getJsonArray("files"); if (filesJsonArray != null) { + String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from // externalTool String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String[] bits = storageIdentifier.split(":"); - String bucketName = bits[1].replace("/", ""); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String storeId = parts[0]; + //If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + String bucketName = ""; + if(bits.length > 1) { + bucketName = bits[0]; + } String fileId = bits[bits.length - 1]; // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId; + //or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId; String fileName = fileJsonObject.getString("fileName"); inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); @@ -690,7 +697,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin // calculateMissingMetadataFields: checksum, mimetype JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - +logger.info("Size: " + newfilesJsonArray.size()); +logger.info("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { @@ -699,15 +707,21 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String storageIdentifier = fileJsonObject.getString("storageIdentifier"); String fileName = fileJsonObject.getString("fileName"); String directoryLabel = fileJsonObject.getString("directoryLabel"); - String[] bits = storageIdentifier.split(":"); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + //If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + String bucketName = ""; + if(bits.length > 1) { + bucketName = bits[0]; + } String fileId = bits[bits.length - 1]; - + List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) .filter(Objects::nonNull).collect(Collectors.toList()); - if (newfileJsonObject != null) { - if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + logger.info("List Size: " + newfileJsonObject.size()); + //if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { JsonPatch path = Json.createPatchBuilder() .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); fileJsonObject = path.apply(fileJsonObject); @@ -716,11 +730,11 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin fileJsonObject = path.apply(fileJsonObject); jsonDataSecondAPI.add(fileJsonObject); countSuccess++; - } else { - globusLogger.info(fileName - + " will be skipped from adding to dataset by second API due to missing values "); - countError++; - } + // } else { + // globusLogger.info(fileName + // + " will be skipped from adding to dataset by second API due to missing values "); + // countError++; + // } } else { globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); @@ -1045,8 +1059,8 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) } catch (IOException ioex) { count = 3; logger.info(ioex.getMessage()); - globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath - + ") does not appear to be an S3 object associated with driver: "); + globusLogger.info("DataFile (fullPAth " + fullPath + + ") does not appear to be accessible withing Dataverse: "); } catch (Exception ex) { count = count + 1; ex.printStackTrace(); From ae16dadddd7978dae23dd62671c05433db2f9300 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 18 Sep 2023 10:13:52 -0400 Subject: [PATCH 039/414] minor cleanup --- .../iq/dataverse/globus/GlobusServiceBean.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 56219f843a7..9aae4dffc03 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -130,7 +130,7 @@ ArrayList checkPermisions(AccessToken clientTokenUser, String directory, return ids; } - +/* public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) throws MalformedURLException { if (directory != null && !directory.equals("")) { @@ -163,8 +163,8 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin count++; } } - - public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException { +*/ + public void deletePermission(String ruleId, Logger globusLogger) throws MalformedURLException { if (ruleId.length() > 0) { AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); @@ -554,7 +554,9 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) + rawStorageId + "&fileName=" + df.getCurrentName(); } } - return tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix; + String finalUrl = tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix; + logger.info("Calling app: " + finalUrl); + return finalUrl; } public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) { @@ -624,7 +626,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String taskStatus = getTaskStatus(task); if (ruleId.length() > 0) { - deletePermision(ruleId, globusLogger); + deletePermission(ruleId, globusLogger); } // If success, switch to an EditInProgress lock - do this before removing the @@ -897,7 +899,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro String taskStatus = getTaskStatus(task); if (ruleId.length() > 0) { - deletePermision(ruleId, globusLogger); + deletePermission(ruleId, globusLogger); } if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { From 9562b788b7dfbfec53d6d7e9aeb52e690cddddf4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 18 Sep 2023 10:14:43 -0400 Subject: [PATCH 040/414] start allowupload method, fix messaging when disabled --- .../harvard/iq/dataverse/api/Datasets.java | 63 ++++++++++++++++++- src/main/java/propertyFiles/Bundle.properties | 3 + 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 599890913fd..a999a71b2d4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3393,6 +3393,65 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } + /** Requests permissions for a given globus user to upload to the dataset + * + * @param crc + * @param datasetId + * @param jsonData + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ + @POST + @AuthRequired + @Path("{id}/allowGlobusUpload") + @Consumes(MediaType.APPLICATION_JSON) + public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody + ) throws IOException, ExecutionException, InterruptedException { + + + logger.info(" ==== (api allowGlobusUpload) jsonBody ====== " + jsonBody); + + + if (!systemConfig.isGlobusUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + } + + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + User authUser; + authUser = getRequestUser(crc); + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + // Async Call + globusService.givePermission(jsonBody, jsonBody, jsonBody, null, datasetId, jsonBody).globusDownload(jsonData, dataset, authUser); + + return ok("Async call to Globus Download started"); + + } + + /** Monitors a globus download and removes permissions on the dir/dataset when done + * + * @param crc + * @param datasetId + * @param jsonData + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ @POST @AuthRequired @Path("{id}/deleteglobusRule") @@ -3404,8 +3463,8 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara logger.info(" ==== (api deleteglobusRule) jsonData ====== " + jsonData); - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + if (!systemConfig.isGlobusDownload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); } // ------------------------------------- diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 997f0470cc3..0343e109e61 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2623,6 +2623,9 @@ datasets.api.privateurl.anonymized.error.released=Can't create a URL for anonymi datasets.api.creationdate=Date Created datasets.api.modificationdate=Last Modified Date datasets.api.curationstatus=Curation Status +datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this installation of Dataverse. +datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this installation of Dataverse. + #Dataverses.java From c6197b3bf23ad1dccb023ea668799e7a79805d93 Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Mon, 18 Sep 2023 10:40:05 -0400 Subject: [PATCH 041/414] #9920 support Postgres 16 --- pom.xml | 4 ++-- scripts/installer/install.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 7ba22d2a076..c5b7fc302f3 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ war 1.2.18.4 - 9.21.2 + 9.22.1 1.20.1 0.8.7 5.2.1 @@ -790,7 +790,7 @@ true docker-build - 13 + 16 gdcc/dataverse:${app.image.tag} unstable diff --git a/scripts/installer/install.py b/scripts/installer/install.py index 5a7b9f75696..18995695638 100644 --- a/scripts/installer/install.py +++ b/scripts/installer/install.py @@ -422,9 +422,13 @@ conn.close() if int(pg_major_version) >= 15: + admin_conn_string = "dbname='"+pgDb+"' user='postgres' password='"+pgAdminPassword+"' host='"+pgHost+"'" + conn = psycopg2.connect(admin_conn_string) + conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + cur = conn.cursor() conn_cmd = "GRANT CREATE ON SCHEMA public TO "+pgUser+";" - print("PostgreSQL 15 or higher detected. Running " + conn_cmd) try: + print("PostgreSQL 15 or higher detected. Running " + conn_cmd) cur.execute(conn_cmd) except: if force: From 116845c753a8364d14bad2edafcebf6a0e28dde6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 19 Sep 2023 15:09:11 -0400 Subject: [PATCH 042/414] refactoring, add allowUpload api call --- .../harvard/iq/dataverse/api/Datasets.java | 7 +- .../dataaccess/GlobusOverlayAccessIO.java | 2 +- .../iq/dataverse/globus/GlobusEndpoint.java | 31 ++++++ .../dataverse/globus/GlobusServiceBean.java | 104 ++++++++++++------ 4 files changed, 109 insertions(+), 35 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index a999a71b2d4..745f294fee6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3434,11 +3434,14 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar } catch (WrappedResponse wr) { return wr.getResponse(); } + + JsonObject params = JsonUtil.getJsonObject(jsonBody); + String principal = params.getString("principal"); // Async Call - globusService.givePermission(jsonBody, jsonBody, jsonBody, null, datasetId, jsonBody).globusDownload(jsonData, dataset, authUser); + globusService.givePermission("identity", principal, "rw", dataset); - return ok("Async call to Globus Download started"); + return ok("Permission Granted"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index b18e6bb7e76..965dc3c0947 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -214,7 +214,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary } } - private static boolean isDataverseManaged(String driverId) { + public static boolean isDataverseManaged(String driverId) { return Boolean.getBoolean("dataverse.files." + driverId + ".managed"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java new file mode 100644 index 00000000000..d1e5d19a592 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java @@ -0,0 +1,31 @@ +package edu.harvard.iq.dataverse.globus; + +public class GlobusEndpoint { + + private String id; + private String clientToken; + private String basePath; + + + public GlobusEndpoint(String id, String clientToken, String basePath) { + + } + public String getId() { + return id; + } + public void setId(String id) { + this.id = id; + } + public String getClientToken() { + return clientToken; + } + public void setClientToken(String clientToken) { + this.clientToken = clientToken; + } + public String getBasePath() { + return basePath; + } + public void setBasePath(String basePath) { + this.basePath = basePath; + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 9aae4dffc03..910ee796e0e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -46,6 +46,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -106,23 +107,23 @@ public void setUserTransferToken(String userTransferToken) { this.userTransferToken = userTransferToken; } - ArrayList checkPermisions(AccessToken clientTokenUser, String directory, String globusEndpoint, - String principalType, String principal) throws MalformedURLException { - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list"); + private ArrayList checkPermissions(GlobusEndpoint endpoint, String principalType, String principal) throws MalformedURLException { + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list"); MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + endpoint.getClientToken(), "GET", null); ArrayList ids = new ArrayList(); if (result.status == 200) { AccessList al = parseJson(result.jsonResponse, AccessList.class, false); for (int i = 0; i < al.getDATA().size(); i++) { Permissions pr = al.getDATA().get(i); - if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory)) + if ((pr.getPath().equals(endpoint.getBasePath() + "/") || pr.getPath().equals(endpoint.getBasePath())) && pr.getPrincipalType().equals(principalType) && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) { ids.add(pr.getId()); } else { - logger.info(pr.getPath() + " === " + directory + " == " + pr.getPrincipalType()); + logger.info(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType()); continue; } } @@ -185,24 +186,24 @@ public void deletePermission(String ruleId, Logger globusLogger) throws Malforme } - public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, - String directory, String globusEndpoint) throws MalformedURLException { + public int givePermission(String principalType, String principal, String perm, Dataset dataset) throws MalformedURLException { - ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, principal); + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + ArrayList rules = checkPermissions(endpoint, principalType, principal); Permissions permissions = new Permissions(); permissions.setDATA_TYPE("access"); permissions.setPrincipalType(principalType); permissions.setPrincipal(principal); - permissions.setPath(directory + "/"); + permissions.setPath(endpoint.getBasePath() + "/"); permissions.setPermissions(perm); Gson gson = new GsonBuilder().create(); MakeRequestResponse result = null; if (rules.size() == 0) { logger.info("Start creating the rule"); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access"); - result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST", + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access"); + result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", gson.toJson(permissions)); if (result.status == 400) { @@ -214,9 +215,9 @@ public int givePermission(String principalType, String principal, String perm, A return result.status; } else { logger.info("Start Updating the rule"); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access/" + rules.get(0)); - result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", + result = makeRequest(url, "Bearer", endpoint.getClientToken(), "PUT", gson.toJson(permissions)); if (result.status == 400) { @@ -438,36 +439,25 @@ static class MakeRequestResponse { } - private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint) + private MakeRequestResponse findDirectory(String directory, String clientToken, String globusEndpoint) throws MalformedURLException { URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/ls?path=" + directory + "/"); MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + clientToken, "GET", null); logger.info("find directory status:" + result.status); return result; } - public boolean giveGlobusPublicPermissions(String datasetId) + public boolean giveGlobusPublicPermissions(Dataset dataset) throws UnsupportedEncodingException, MalformedURLException { - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - if (globusEndpoint.equals("") || globusBasicToken.equals("")) { - return false; - } - AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); - if (clientTokenUser == null) { - logger.severe("Cannot get client token "); - return false; - } + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - String directory = getDirectory(datasetId); - logger.info(directory); - MakeRequestResponse status = findDirectory(directory, clientTokenUser, globusEndpoint); + MakeRequestResponse status = findDirectory(endpoint.getBasePath(), endpoint.getClientToken(), endpoint.getId()); if (status.status == 200) { @@ -485,8 +475,7 @@ public boolean giveGlobusPublicPermissions(String datasetId) * 201) { logger.info("Cannot get permission for " + file.getName()); } } } } */ - int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory, - globusEndpoint); + int perStatus = givePermission("all_authenticated_users", "", "r", dataset); logger.info("givePermission status " + perStatus); if (perStatus == 409) { logger.info("Permissions already exist or limit was reached"); @@ -1287,4 +1276,55 @@ public String calculatemime(String fileName) throws InterruptedException { * updatePermision(clientTokenUser, directory, "identity", "r"); return true; } * */ + + GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { + Dataset dataset = null; + if (dvObject instanceof Dataset) { + dataset = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + dataset = (Dataset) dvObject.getOwner(); + } else { + throw new IllegalArgumentException("Unsupported DvObject type: " + dvObject.getClass().getName()); + } + String driverId = dataset.getEffectiveStorageDriverId(); + GlobusEndpoint endpoint = null; + String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); + + String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3); + int pathStart = endpointWithBasePath.indexOf("/"); + logger.info("endpointWithBasePath: " + endpointWithBasePath); + String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); + logger.info("directoryPath: " + directoryPath); + + if (GlobusOverlayAccessIO.isDataverseManaged(driverId) && (dataset!=null)) { + directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); + logger.info("directoryPath now: " + directoryPath); + + } else { + //remote store - may have path in file storageidentifier + String relPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + int filenameStart = relPath.lastIndexOf("/") + 1; + if (filenameStart > 0) { + directoryPath = directoryPath + relPath.substring(0, filenameStart); + } + } + logger.info("directoryPath finally: " + directoryPath); + + String endpointId = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; + + logger.info("endpointId: " + endpointId); + + String globusToken = System.getProperty("dataverse.files." + driverId + ".globus-token"); + + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); + String clientToken = accessToken.getOtherTokens().get(0).getAccessToken(); + + endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath); + + return endpoint; + } + + private static boolean isDataverseManaged(String driverId) { + return Boolean.getBoolean("dataverse.files." + driverId + ".managed"); + } } From 9d846d2455e820cc9312863079086c66b0799c7a Mon Sep 17 00:00:00 2001 From: Vera Clemens Date: Tue, 26 Sep 2023 09:13:13 +0200 Subject: [PATCH 043/414] fix: require ManageDatasetPermissions for listing role assignments on datasets --- .../engine/command/impl/ListRoleAssignments.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java index 1858ba377ab..b619d32cc7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java @@ -6,16 +6,18 @@ import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Collections; /** * * @author michael */ -@RequiredPermissions( Permission.ManageDataversePermissions ) +// no annotations here, since permissions are dynamically decided public class ListRoleAssignments extends AbstractCommand> { private final DvObject definitionPoint; @@ -34,5 +36,12 @@ public List execute(CommandContext ctxt) throws CommandException } return ctxt.permissions().assignmentsOn(definitionPoint); } + + @Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + definitionPoint.isInstanceofDataset() ? Collections.singleton(Permission.ManageDatasetPermissions) + : Collections.singleton(Permission.ManageDataversePermissions)); + } } From 41e363e343861f6b416e6add60e60778f697cce0 Mon Sep 17 00:00:00 2001 From: Vera Clemens Date: Tue, 26 Sep 2023 09:13:36 +0200 Subject: [PATCH 044/414] test: require ManageDatasetPermissions for listing role assignments on datasets --- scripts/api/data/role-contributor-plus.json | 12 +++ .../harvard/iq/dataverse/api/DatasetsIT.java | 87 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 scripts/api/data/role-contributor-plus.json diff --git a/scripts/api/data/role-contributor-plus.json b/scripts/api/data/role-contributor-plus.json new file mode 100644 index 00000000000..ef9ba3aaff6 --- /dev/null +++ b/scripts/api/data/role-contributor-plus.json @@ -0,0 +1,12 @@ +{ + "alias":"contributorPlus", + "name":"ContributorPlus", + "description":"For datasets, a person who can edit License + Terms, then submit them for review, and add collaborators.", + "permissions":[ + "ViewUnpublishedDataset", + "EditDataset", + "DownloadFile", + "DeleteDatasetDraft", + "ManageDatasetPermissions" + ] +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 3b6d4d1ecdf..b51d400d2d4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -1296,6 +1296,93 @@ public void testAddRoles(){ } + @Test + public void testListRoleAssignments() { + Response createAdminUser = UtilIT.createRandomUser(); + String adminUsername = UtilIT.getUsernameFromResponse(createAdminUser); + String adminApiToken = UtilIT.getApiTokenFromResponse(createAdminUser); + UtilIT.makeSuperUser(adminUsername); + + Response createDataverseResponse = UtilIT.createRandomDataverse(adminApiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // Now, let's allow anyone with a Dataverse account (any "random user") + // to create datasets in this dataverse: + + Response grantRole = UtilIT.grantRoleOnDataverse(dataverseAlias, DataverseRole.DS_CONTRIBUTOR, AuthenticatedUsers.get().getIdentifier(), adminApiToken); + grantRole.prettyPrint(); + assertEquals(OK.getStatusCode(), grantRole.getStatusCode()); + + Response createContributorUser = UtilIT.createRandomUser(); + String contributorUsername = UtilIT.getUsernameFromResponse(createContributorUser); + String contributorApiToken = UtilIT.getApiTokenFromResponse(createContributorUser); + + // First, we test listing role assignments on a dataverse which requires "ManageDataversePermissions" + + Response notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken); + assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode()); + + Response roleAssignmentsOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, adminApiToken); + roleAssignmentsOnDataverse.prettyPrint(); + assertEquals(OK.getStatusCode(), roleAssignmentsOnDataverse.getStatusCode()); + + // Second, we test listing role assignments on a dataset which requires "ManageDatasetPermissions" + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, contributorApiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + logger.info("dataset id: " + datasetId); + + Response datasetAsJson = UtilIT.nativeGet(datasetId, adminApiToken); + datasetAsJson.then().assertThat() + .statusCode(OK.getStatusCode()); + + String identifier = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier"); + assertEquals(10, identifier.length()); + + String protocol1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.protocol"); + String authority1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.authority"); + String identifier1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier"); + String datasetPersistentId = protocol1 + ":" + authority1 + "/" + identifier1; + + Response notPermittedToListRoleAssignmentOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken); + assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataset.getStatusCode()); + + // We create a new role that includes "ManageDatasetPermissions" which are required for listing role assignments + // of a dataset and assign it to the contributor user + + String pathToJsonFile = "scripts/api/data/role-contributor-plus.json"; + Response addDataverseRoleResponse = UtilIT.addDataverseRole(pathToJsonFile, dataverseAlias, adminApiToken); + addDataverseRoleResponse.prettyPrint(); + String body = addDataverseRoleResponse.getBody().asString(); + String status = JsonPath.from(body).getString("status"); + assertEquals("OK", status); + + Response giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "contributorPlus", "@" + contributorUsername, adminApiToken); + giveRandoPermission.prettyPrint(); + assertEquals(200, giveRandoPermission.getStatusCode()); + + // Contributor user should now be able to list dataset role assignments as well + + Response roleAssignmentsOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken); + roleAssignmentsOnDataset.prettyPrint(); + assertEquals(OK.getStatusCode(), roleAssignmentsOnDataset.getStatusCode()); + + // ...but not dataverse role assignments + + notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken); + assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode()); + + // Finally, we clean up and delete the role we created + + Response deleteDataverseRoleResponse = UtilIT.deleteDataverseRole("contributorPlus", adminApiToken); + deleteDataverseRoleResponse.prettyPrint(); + body = deleteDataverseRoleResponse.getBody().asString(); + status = JsonPath.from(body).getString("status"); + assertEquals("OK", status); + } + @Test public void testFileChecksum() { From 794045fbc0f43f0cebccc9ebb4f4b234a6c2215a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 3 Oct 2023 17:03:38 -0400 Subject: [PATCH 045/414] update auth checks and err handling --- .../harvard/iq/dataverse/api/Datasets.java | 41 +++++++++++++++---- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 110bfcc1553..25839544ce9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -65,6 +65,7 @@ import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -3429,8 +3430,12 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar // ------------------------------------- // (1) Get the user from the ContainerRequestContext // ------------------------------------- - User authUser; - authUser = getRequestUser(crc); + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse e) { + return e.getResponse(); + } // ------------------------------------- // (2) Get the Dataset Id @@ -3442,14 +3447,32 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar } catch (WrappedResponse wr) { return wr.getResponse(); } - - JsonObject params = JsonUtil.getJsonObject(jsonBody); - String principal = params.getString("principal"); - // Async Call - globusService.givePermission("identity", principal, "rw", dataset); - - return ok("Permission Granted"); + if(!GlobusOverlayAccessIO.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { + return badRequest("This dataset does not have managed Globus storage"); + } + + if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset) + .canIssue(UpdateDatasetVersionCommand.class)) { + + JsonObject params = JsonUtil.getJsonObject(jsonBody); + String principal = params.getString("principal"); + + // Async Call + int status = globusService.givePermission("identity", principal, "rw", dataset); + switch (status) { + case 201: + return ok("Permission Granted"); + case 400: + return badRequest("Unable to grant permission"); + case 409: + return conflict("Permission already exists"); + default: + return error(null, "Unexpected error when granting permission"); + } + } else { + return forbidden("User doesn't have permission to upload to this dataset"); + } } From c724094dcfffaa83c61f415d572e2e5a8958cef0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 3 Oct 2023 17:03:57 -0400 Subject: [PATCH 046/414] fix constructor, reformat --- .../iq/dataverse/globus/GlobusEndpoint.java | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java index d1e5d19a592..7e555935e2e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java @@ -5,27 +5,34 @@ public class GlobusEndpoint { private String id; private String clientToken; private String basePath; - - + public GlobusEndpoint(String id, String clientToken, String basePath) { - + this.id = id; + this.clientToken = clientToken; + this.basePath = basePath; } - public String getId() { + + public String getId() { return id; } - public void setId(String id) { + + public void setId(String id) { this.id = id; } - public String getClientToken() { + + public String getClientToken() { return clientToken; } - public void setClientToken(String clientToken) { + + public void setClientToken(String clientToken) { this.clientToken = clientToken; } - public String getBasePath() { + + public String getBasePath() { return basePath; } - public void setBasePath(String basePath) { + + public void setBasePath(String basePath) { this.basePath = basePath; } } \ No newline at end of file From ed87e0640788278b5af838ba98efd72413d2586d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 3 Oct 2023 17:04:51 -0400 Subject: [PATCH 047/414] start to monitor access rule changes --- .../dataverse/globus/GlobusServiceBean.java | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 910ee796e0e..ad20b90971b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -206,10 +206,21 @@ public int givePermission(String principalType, String principal, String perm, D result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", gson.toJson(permissions)); - if (result.status == 400) { + switch (result.status) { + case 400: + logger.severe("Path " + permissions.getPath() + " is not valid"); - } else if (result.status == 409) { + break; + case 409: logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); + break; + case 201: + JsonObject response = JsonUtil.getJsonObject(result.jsonResponse); + if (response != null && response.containsKey("access_id")) { + permissions.setId(response.getString("access_id")); + monitorTemporaryPermissions(permissions, endpoint); + logger.info("Access rule " + permissions.getId() + " was created successfully"); + } } return result.status; @@ -226,9 +237,13 @@ public int givePermission(String principalType, String principal, String perm, D logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); } logger.info("Result status " + result.status); + return result.status; } + } - return result.status; + private void monitorTemporaryPermissions(Permissions permissions, GlobusEndpoint endpoint) { + // TODO Auto-generated method stub + } public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException { @@ -324,6 +339,7 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a // Basic // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 logger.info(authType + " " + authCode); + logger.info("For URL: " + url.toString()); connection.setRequestProperty("Authorization", authType + " " + authCode); // connection.setRequestProperty("Content-Type", // "application/x-www-form-urlencoded"); @@ -333,6 +349,7 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a connection.setRequestProperty("Accept", "application/json"); logger.info(jsonString); connection.setDoOutput(true); + OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream()); wr.write(jsonString); wr.flush(); @@ -1318,7 +1335,7 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); String clientToken = accessToken.getOtherTokens().get(0).getAccessToken(); - +logger.info("clientToken: " + clientToken); endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath); return endpoint; From 4c67f2a636699d51589fa815511ce4e1b3dc9d1f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 6 Oct 2023 12:13:19 -0400 Subject: [PATCH 048/414] remove inefficient bucket check --- .../iq/dataverse/dataaccess/S3AccessIO.java | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 822ada0b83e..22216ee5c2b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -108,14 +108,13 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { if(!StringUtil.isEmpty(proxy)&&StringUtil.isEmpty(endpoint)) { logger.severe(driverId + " config error: Must specify a custom-endpoint-url if proxy-url is specified"); } - //Not sure this is needed but moving it from the open method for now since it definitely doesn't need to run every time an object is opened. - try { - if (bucketName == null || !s3.doesBucketExistV2(bucketName)) { - throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets."); - } - } catch (SdkClientException sce) { - throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?): " + sce.getMessage()); - } + + // FWIW: There used to be a check here to see if the bucket exists. + // It was very redundant (checking every time we access any file) and didn't do + // much but potentially make the failure (in the unlikely case a bucket doesn't + // exist/just disappeared) happen slightly earlier (here versus at the first + // file/metadata access). + } catch (Exception e) { throw new AmazonClientException( "Cannot instantiate a S3 client; check your AWS credentials and region", From 90dfa42c9090ce9e4cf9dab1e8ed57776137a077 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 11 Oct 2023 16:41:38 -0400 Subject: [PATCH 049/414] Redesigned provider mechanism --- .../iq/dataverse/dataaccess/S3AccessIO.java | 71 ++++++++++++++----- 1 file changed, 52 insertions(+), 19 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 22216ee5c2b..ee04bbcb853 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -4,6 +4,7 @@ import com.amazonaws.ClientConfiguration; import com.amazonaws.HttpMethod; import com.amazonaws.SdkClientException; +import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.AWSStaticCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; @@ -57,9 +58,11 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; +import java.util.Optional; import java.util.Random; import java.util.function.Predicate; import java.util.logging.Logger; @@ -1180,29 +1183,59 @@ private static AmazonS3 getClient(String driverId) { // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); - /** - * Pass in a string value if this storage driver should use a non-default AWS S3 profile. - * The default is "default" which should work when only one profile exists. + /** Configure credentials for the S3 client. There are multiple mechanisms available. + * Role-based/instance credentials are globally defined while the other mechanisms (profile, static) + * are defined per store. The logic below assures that + * * if a store specific profile or static credentials are explicitly set, they will be used in preference to the global role-based credentials. + * * if a store specific role-based credentials are explicitly set, they will be used in preference to the global instance credentials, + * * if a profile and static credentials are both explicitly set, the profile will be used preferentially, and + * * if no store-specific credentials are set, the global credentials will be preferred over using any "default" profile credentials that are found. */ - String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default"); - ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile); - - // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env - // vars or system properties to provide these, but use the secrets config source provided by Payara. - AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( - new BasicAWSCredentials( - config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class).orElse(""), - config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class).orElse("") - )); - - //Add role-based provider as in the default provider chain - InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance(); + ArrayList providers = new ArrayList<>(); + + String s3profile = System.getProperty("dataverse.files." + driverId + ".profile"); + boolean allowInstanceCredentials = true; + // Assume that instance credentials should not be used if the profile is + // actually set for this store or if static creds are provided (below). + if (s3profile != null) { + allowInstanceCredentials = false; + } + // Try to retrieve credentials via Microprofile Config API, too. For production + // use, you should not use env vars or system properties to provide these, but + // use the secrets config source provided by Payara. + Optional accessKey = config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class); + Optional secretKey = config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class); + if (accessKey.isPresent() && secretKey.isPresent()) { + allowInstanceCredentials = false; + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( + new BasicAWSCredentials( + accessKey.orElse(""), + secretKey.orElse(""))); + providers.add(staticCredentials); + } else if (s3profile == null) { + //Only use the default profile when it isn't explicitly set for this store when there are no static creds (otherwise it will be preferred). + s3profile = "default"; + } + if (s3profile != null) { + ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile); + providers.add(profileCredentials); + } + + if (allowInstanceCredentials) { + // Add role-based provider as in the default provider chain + InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance(); + providers.add(instanceCredentials); + } // Add all providers to chain - the first working provider will be used - // (role-based is first in the default cred provider chain, so we're just + // (role-based is first in the default cred provider chain (if no profile or + // static creds are explicitly set for the store), so we're just // reproducing that, then profile, then static credentials as the fallback) - AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(instanceCredentials, profileCredentials, staticCredentials); + + // As the order is the reverse of how we added providers, we reverse the list here + Collections.reverse(providers); + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(providers); s3CB.setCredentials(providerChain); - + // let's build the client :-) AmazonS3 client = s3CB.build(); driverClientMap.put(driverId, client); From dcca52566958fba3f58698766f9696723fcebfc0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 Oct 2023 09:28:42 -0400 Subject: [PATCH 050/414] Good cleanup Co-authored-by: Oliver Bertuch --- .../harvard/iq/dataverse/dataaccess/S3AccessIO.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index ee04bbcb853..a66686ac648 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1209,22 +1209,20 @@ private static AmazonS3 getClient(String driverId) { allowInstanceCredentials = false; AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( new BasicAWSCredentials( - accessKey.orElse(""), - secretKey.orElse(""))); + accessKey.get(), + secretKey.get())); providers.add(staticCredentials); } else if (s3profile == null) { //Only use the default profile when it isn't explicitly set for this store when there are no static creds (otherwise it will be preferred). s3profile = "default"; } if (s3profile != null) { - ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile); - providers.add(profileCredentials); + providers.add(new ProfileCredentialsProvider(s3profile)); } if (allowInstanceCredentials) { // Add role-based provider as in the default provider chain - InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance(); - providers.add(instanceCredentials); + providers.add(InstanceProfileCredentialsProvider.getInstance()); } // Add all providers to chain - the first working provider will be used // (role-based is first in the default cred provider chain (if no profile or From 4ad95697405512c16ec42b1d242ce620aec2436a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 Oct 2023 16:32:13 -0400 Subject: [PATCH 051/414] partial changes for permission mgmt, etc. --- .../harvard/iq/dataverse/api/Datasets.java | 33 ++- .../dataverse/globus/GlobusServiceBean.java | 218 +++++++++++------- .../iq/dataverse/settings/JvmSettings.java | 1 + 3 files changed, 155 insertions(+), 97 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 25839544ce9..d3ea1b80696 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3363,6 +3363,15 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } catch (WrappedResponse wr) { return wr.getResponse(); } + + JsonObject jsonObject = null; + try { + jsonObject = JsonUtil.getJsonObject(jsonData); + } catch (Exception ex) { + logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage()); + return badRequest("Error parsing json body"); + + } //------------------------------------ // (2b) Make sure dataset does not have package file @@ -3396,7 +3405,7 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); // Async Call - globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser); + globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); return ok("Async call to Globus Upload started "); @@ -3414,9 +3423,10 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, */ @POST @AuthRequired - @Path("{id}/allowGlobusUpload") + @Path("{id}/requestGlobusTransferPaths") @Consumes(MediaType.APPLICATION_JSON) - public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody + @Produces(MediaType.APPLICATION_JSON) + public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody ) throws IOException, ExecutionException, InterruptedException { @@ -3454,15 +3464,18 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset) .canIssue(UpdateDatasetVersionCommand.class)) { - + try { JsonObject params = JsonUtil.getJsonObject(jsonBody); String principal = params.getString("principal"); + int numberOfPaths = params.getInt("numberOfFiles"); + if(numberOfPaths <=0) { + return badRequest("numberOfFiles must be positive"); + } - // Async Call - int status = globusService.givePermission("identity", principal, "rw", dataset); - switch (status) { + JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths); + switch (response.getInt("status")) { case 201: - return ok("Permission Granted"); + return ok(response.getJsonArray("paths")); case 400: return badRequest("Unable to grant permission"); case 409: @@ -3470,6 +3483,10 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar default: return error(null, "Unexpected error when granting permission"); } + } catch (NullPointerException|ClassCastException e) { + return badRequest("Error retrieving principal and numberOfFiles from JSON request body"); + + } } else { return forbidden("User doesn't have permission to upload to this dataset"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index ad20b90971b..49572519696 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1,7 +1,11 @@ package edu.harvard.iq.dataverse.globus; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; import com.google.gson.FieldNamingPolicy; import com.google.gson.GsonBuilder; +import com.nimbusds.oauth2.sdk.pkce.CodeVerifier; + import edu.harvard.iq.dataverse.*; import jakarta.ejb.Asynchronous; @@ -15,7 +19,9 @@ import jakarta.json.JsonArray; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonPatch; +import jakarta.json.JsonValue; import jakarta.servlet.http.HttpServletRequest; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; @@ -29,6 +35,8 @@ import java.net.URLEncoder; import java.sql.Timestamp; import java.text.SimpleDateFormat; +import java.time.Duration; +import java.time.temporal.ChronoUnit; import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -48,6 +56,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -107,8 +116,10 @@ public void setUserTransferToken(String userTransferToken) { this.userTransferToken = userTransferToken; } - private ArrayList checkPermissions(GlobusEndpoint endpoint, String principalType, String principal) throws MalformedURLException { + private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions) throws MalformedURLException { + String principalType="identity"; + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list"); MakeRequestResponse result = makeRequest(url, "Bearer", endpoint.getClientToken(), "GET", null); @@ -118,20 +129,22 @@ private ArrayList checkPermissions(GlobusEndpoint endpoint, String princ for (int i = 0; i < al.getDATA().size(); i++) { Permissions pr = al.getDATA().get(i); + if ((pr.getPath().equals(endpoint.getBasePath() + "/") || pr.getPath().equals(endpoint.getBasePath())) && pr.getPrincipalType().equals(principalType) - && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) { - ids.add(pr.getId()); + && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal))) + &&pr.getPermissions().equals(permissions)) { + return pr.getId(); } else { - logger.info(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType()); + logger.fine(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType()); continue; } } } - - return ids; + return null; } -/* + + /* public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) throws MalformedURLException { if (directory != null && !directory.equals("")) { @@ -165,47 +178,71 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin } } */ - public void deletePermission(String ruleId, Logger globusLogger) throws MalformedURLException { - - if (ruleId.length() > 0) { - AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); - - globusLogger.info("Start deleting permissions."); - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - - URL url = new URL( - "https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "DELETE", null); - if (result.status != 200) { - globusLogger.warning("Cannot delete access rule " + ruleId); - } else { - globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + +/** Call to delete a globus rule related to the specified dataset. + * + * @param ruleId - Globus rule id - assumed to be associated with the dataset's file path (should not be called with a user specified rule id w/o further checking) + * @param datasetId - the id of the dataset associated with the rule + * @param globusLogger - a separate logger instance, may be null + */ +public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) { + + if (ruleId.length() > 0) { + if (dataset != null) { + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + if (endpoint != null) { + String accessToken = endpoint.getClientToken(); + if (globusLogger != null) { + globusLogger.info("Start deleting permissions."); + } + try { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + + "/access/" + ruleId); + MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "DELETE", null); + if (result.status != 200) { + if (globusLogger != null) { + globusLogger.warning("Cannot delete access rule " + ruleId); + } else { + // When removed due to a cache ejection, we don't have a globusLogger + logger.warning("Cannot delete access rule " + ruleId); + } + } else { + if (globusLogger != null) { + globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + } + } + } catch (MalformedURLException ex) { + logger.log(Level.WARNING, + "Failed to delete access rule " + ruleId + " on endpoint " + endpoint.getId(), ex); + } } } - } +} - public int givePermission(String principalType, String principal, String perm, Dataset dataset) throws MalformedURLException { + public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int numberOfPaths) { GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - ArrayList rules = checkPermissions(endpoint, principalType, principal); + String principalType= "identity"; Permissions permissions = new Permissions(); permissions.setDATA_TYPE("access"); permissions.setPrincipalType(principalType); permissions.setPrincipal(principal); permissions.setPath(endpoint.getBasePath() + "/"); - permissions.setPermissions(perm); + permissions.setPermissions("rw"); Gson gson = new GsonBuilder().create(); MakeRequestResponse result = null; - if (rules.size() == 0) { logger.info("Start creating the rule"); + JsonObjectBuilder response = Json.createObjectBuilder(); + + try { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access"); result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", gson.toJson(permissions)); + response.add("status", result.status); switch (result.status) { case 400: @@ -215,35 +252,50 @@ public int givePermission(String principalType, String principal, String perm, D logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); break; case 201: - JsonObject response = JsonUtil.getJsonObject(result.jsonResponse); - if (response != null && response.containsKey("access_id")) { - permissions.setId(response.getString("access_id")); - monitorTemporaryPermissions(permissions, endpoint); + JsonObject globusResponse = JsonUtil.getJsonObject(result.jsonResponse); + if (globusResponse != null && globusResponse.containsKey("access_id")) { + permissions.setId(globusResponse.getString("access_id")); + monitorTemporaryPermissions(permissions.getId(), dataset.getId()); logger.info("Access rule " + permissions.getId() + " was created successfully"); + JsonArrayBuilder pathArray = Json.createArrayBuilder(); + for(int i=0;i rulesCache = Caffeine.newBuilder() + .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) + .removalListener((ruleId, datasetId, cause) -> { + //Delete rules that expire + Dataset dataset = datasetSvc.find(datasetId); + deletePermission((String) ruleId, dataset, null); + }) + + .build(); + + + private void monitorTemporaryPermissions(String ruleId, long datasetId) { + rulesCache.put(ruleId, datasetId); } public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException { @@ -468,6 +520,7 @@ private MakeRequestResponse findDirectory(String directory, String clientToken, return result; } + /* public boolean giveGlobusPublicPermissions(Dataset dataset) throws UnsupportedEncodingException, MalformedURLException { @@ -478,20 +531,6 @@ public boolean giveGlobusPublicPermissions(Dataset dataset) if (status.status == 200) { - /* - * FilesList fl = parseJson(status.jsonResponse, FilesList.class, false); - * ArrayList files = fl.getDATA(); if (files != null) { for (FileG file: - * files) { if (!file.getName().contains("cached") && - * !file.getName().contains(".thumb")) { int perStatus = - * givePermission("all_authenticated_users", "", "r", clientTokenUser, directory - * + "/" + file.getName(), globusEndpoint); logger.info("givePermission status " - * + perStatus + " for " + file.getName()); if (perStatus == 409) { - * logger.info("Permissions already exist or limit was reached for " + - * file.getName()); } else if (perStatus == 400) { - * logger.info("No file in Globus " + file.getName()); } else if (perStatus != - * 201) { logger.info("Cannot get permission for " + file.getName()); } } } } - */ - int perStatus = givePermission("all_authenticated_users", "", "r", dataset); logger.info("givePermission status " + perStatus); if (perStatus == 409) { @@ -512,7 +551,8 @@ public boolean giveGlobusPublicPermissions(Dataset dataset) return true; } - +*/ + // Generates the URL to launch the Globus app public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); @@ -572,7 +612,7 @@ public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) { @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, + public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { Integer countAll = 0; @@ -606,33 +646,33 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin // ToDo - use DataAccess methods? //String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); //datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); + + logger.fine("json: " + JsonUtil.prettyPrint(jsonData)); - Thread.sleep(5000); - - JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(jsonData)) { - jsonObject = Json.createReader(rdr).readObject(); - } catch (Exception jpe) { - jpe.printStackTrace(); - logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); - } - logger.info("json: " + JsonUtil.prettyPrint(jsonObject)); - - String taskIdentifier = jsonObject.getString("taskIdentifier"); + String taskIdentifier = jsonData.getString("taskIdentifier"); - String ruleId = ""; - try { - ruleId = jsonObject.getString("ruleId"); - } catch (NullPointerException npe) { - logger.warning("NPE for jsonData object"); - } + String ruleId = null; + Thread.sleep(5000); + // globus task status check GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - if (ruleId.length() > 0) { - deletePermission(ruleId, globusLogger); + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + + ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + if(ruleId!=null) { + Long datasetId = rulesCache.getIfPresent(ruleId); + if(datasetId!=null) { + + //Will delete rule + rulesCache.invalidate(ruleId); + } else { + //The cache already expired this rule, in which case it's delay not long enough, or we have some other problem + logger.warning("Rule " + ruleId + " not found in rulesCache"); + deletePermission(ruleId, dataset, globusLogger); + } } // If success, switch to an EditInProgress lock - do this before removing the @@ -674,7 +714,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin // List inputList = new ArrayList(); - JsonArray filesJsonArray = jsonObject.getJsonArray("files"); + JsonArray filesJsonArray = jsonData.getJsonArray("files"); if (filesJsonArray != null) { String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); @@ -905,7 +945,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro String taskStatus = getTaskStatus(task); if (ruleId.length() > 0) { - deletePermission(ruleId, globusLogger); + deletePermission(ruleId, dataset, globusLogger); } if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index b4807372b69..f8abe505dca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -48,6 +48,7 @@ public enum JvmSettings { SCOPE_FILES(PREFIX, "files"), FILES_DIRECTORY(SCOPE_FILES, "directory"), GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), + GLOBUS_RULES_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"), FILES(SCOPE_FILES), BASE_URL(FILES, "base-url"), GLOBUS_TOKEN(FILES, "globus-token"), From 30395309689949a3fc633e3be5fa4c30cc1f27cd Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 Oct 2023 16:33:02 -0400 Subject: [PATCH 052/414] check driver type not id --- .../java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index f4cc7d40120..3bc83538679 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -606,7 +606,7 @@ public static String getDriverPrefix(String driverId) { } public static boolean isDirectUploadEnabled(String driverId) { - return (DataAccess.S3.equals(driverId) && Boolean.parseBoolean(System.getProperty("dataverse.files." + DataAccess.S3 + ".upload-redirect"))) || + return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3) && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect"))) || Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band")); } From 48144a24cb200e285b5419ab29865293eac17e54 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 16 Oct 2023 17:00:51 -0400 Subject: [PATCH 053/414] adding extra logic to skip things like facets and highlights in searches, unless specifically requested. (#9635) --- .../search/SearchIncludeFragment.java | 91 ++++-- .../dataverse/search/SearchServiceBean.java | 308 +++++++++++------- 2 files changed, 249 insertions(+), 150 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 2ce06541afa..1e42958fe4e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -120,7 +120,6 @@ public class SearchIncludeFragment implements java.io.Serializable { private Long facetCountDatasets = 0L; private Long facetCountFiles = 0L; Map previewCountbyType = new HashMap<>(); - private SolrQueryResponse solrQueryResponseAllTypes; private String sortField; private SortOrder sortOrder; private String currentSort; @@ -132,6 +131,7 @@ public class SearchIncludeFragment implements java.io.Serializable { Map datasetfieldFriendlyNamesBySolrField = new HashMap<>(); Map staticSolrFieldFriendlyNamesBySolrField = new HashMap<>(); private boolean solrIsDown = false; + private boolean solrIsOverloaded = false; private Map numberOfFacets = new HashMap<>(); // private boolean showUnpublished; List filterQueriesDebug = new ArrayList<>(); @@ -279,6 +279,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused SolrQueryResponse solrQueryResponse = null; + SolrQueryResponse solrQueryResponseSecondPass = null; List filterQueriesFinal = new ArrayList<>(); @@ -311,18 +312,11 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused String[] parts = selectedTypesString.split(":"); selectedTypesList.addAll(Arrays.asList(parts)); - List filterQueriesFinalAllTypes = new ArrayList<>(); - String[] arr = selectedTypesList.toArray(new String[selectedTypesList.size()]); - selectedTypesHumanReadable = combine(arr, " OR "); - if (!selectedTypesHumanReadable.isEmpty()) { - typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")"; - } + filterQueriesFinal.addAll(filterQueries); - filterQueriesFinalAllTypes.addAll(filterQueriesFinal); - String allTypesFilterQuery = SearchFields.TYPE + ":(dataverses OR datasets OR files)"; - filterQueriesFinalAllTypes.add(allTypesFilterQuery); + filterQueriesFinal.add(typeFilterQuery); if (page <= 1) { @@ -363,10 +357,60 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused // This 2nd search() is for populating the "type" ("dataverse", "dataset", "file") facets: -- L.A. // (why exactly do we need it, again?) // To get the counts we display in the types facets particulary for unselected types - SEK 08/25/2021 - solrQueryResponseAllTypes = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalAllTypes, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); - if (solrQueryResponse.hasError()){ - logger.info(solrQueryResponse.getError()); - setSolrErrorEncountered(true); + // Sure, but we should not waste resources here. We will try to save + // solr some extra work and a) only run this second query IF there is + // one or more unselected type facets; and b) drop all the extra + // parameters from this second query - such as facets and highlights - + // that we do not actually need for the purposes of finding these + // extra numbers. -- L.A. 10/16/2023 + + // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560 + previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), -1L); + previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), -1L); + previewCountbyType.put(BundleUtil.getStringFromBundle("files"), -1L); + + + // This will populate the type facet counts for the types that are + // currently selected on the collection page: + for (FacetCategory facetCategory : solrQueryResponse.getTypeFacetCategories()) { + for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); + } + } + + if (selectedTypesList.size() < 3) { + // If some types are NOT currently selected, we will need to + // run another query to obtain the numbers of the unselected types: + + List filterQueriesFinalSecondPass = new ArrayList<>(); + filterQueriesFinalSecondPass.addAll(filterQueriesFinal); + + List selectedTypesListSecondPass = new ArrayList<>(); + + for (String dvObjectType : previewCountbyType.keySet()) { + if (previewCountbyType.get(dvObjectType) == -1) { + selectedTypesListSecondPass.add(dvObjectType); + } + } + + String[] arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]); + filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")"); + + if (solrQueryResponseSecondPass != null) { + + solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); + if (solrQueryResponseSecondPass.hasError()) { + logger.info(solrQueryResponse.getError()); + setSolrErrorEncountered(true); + } + + // And now populate the remaining type facets: + for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) { + for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); + } + } + } } } catch (SearchException ex) { @@ -446,17 +490,6 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } - // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560 - previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), 0L); - previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), 0L); - previewCountbyType.put(BundleUtil.getStringFromBundle("files"), 0L); - if (solrQueryResponseAllTypes != null) { - for (FacetCategory facetCategory : solrQueryResponseAllTypes.getTypeFacetCategories()) { - for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { - previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); - } - } - } setDisplayCardValues(); @@ -1020,6 +1053,14 @@ public boolean isSolrIsDown() { public void setSolrIsDown(boolean solrIsDown) { this.solrIsDown = solrIsDown; } + + public boolean isSolrOverloaded() { + return solrIsOverloaded; + } + + public void setSolrIsOverloaded(boolean solrIsOverloaded) { + this.solrIsOverloaded = solrIsOverloaded; + } public boolean isRootDv() { return rootDv; diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 44976d232c2..aa2948eb8cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -101,7 +101,7 @@ public class SearchServiceBean { public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, String query, List filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException { return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null); } - + /** * Import note: "onlyDatatRelatedToMe" relies on filterQueries for providing * access to Private Data for the correct user @@ -122,6 +122,41 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, + String query, + List filterQueries, + String sortField, + String sortOrder, + int paginationStart, + boolean onlyDatatRelatedToMe, + int numResultsPerPage, + boolean retrieveEntities, + String geoPoint, + String geoRadius) throws SearchException { + return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null, true, true); + } + + /** + * @param dataverseRequest + * @param dataverses + * @param query + * @param filterQueries + * @param sortField + * @param sortOrder + * @param paginationStart + * @param onlyDatatRelatedToMe + * @param numResultsPerPage + * @param retrieveEntities - look up dvobject entities with .find() (potentially expensive!) + * @param geoPoint e.g. "35,15" + * @param geoRadius e.g. "5" + * @param addFacets boolean + * @param addHighlights boolean * @return * @throws SearchException */ @@ -136,7 +171,9 @@ public SolrQueryResponse search( int numResultsPerPage, boolean retrieveEntities, String geoPoint, - String geoRadius + String geoRadius, + boolean addFacets, + boolean addHighlights ) throws SearchException { if (paginationStart < 0) { @@ -157,56 +194,62 @@ public SolrQueryResponse search( // solrQuery.setSort(sortClause); // } // solrQuery.setSort(sortClause); - solrQuery.setHighlight(true).setHighlightSnippets(1); - Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); - if (fragSize != null) { - solrQuery.setHighlightFragsize(fragSize); - } - solrQuery.setHighlightSimplePre(""); - solrQuery.setHighlightSimplePost(""); + + List datasetFields = datasetFieldService.findAllOrderedById(); Map solrFieldsToHightlightOnMap = new HashMap<>(); - // TODO: Do not hard code "Name" etc as English here. - solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); - solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); - solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); - solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); - - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); - /** - * @todo Dataverse subject and affiliation should be highlighted but - * this is commented out right now because the "friendly" names are not - * being shown on the dataverse cards. See also - * https://github.com/IQSS/dataverse/issues/1431 - */ + if (addHighlights) { + solrQuery.setHighlight(true).setHighlightSnippets(1); + Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); + if (fragSize != null) { + solrQuery.setHighlightFragsize(fragSize); + } + solrQuery.setHighlightSimplePre(""); + solrQuery.setHighlightSimplePost(""); + + // TODO: Do not hard code "Name" etc as English here. + solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); + solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); + solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); + solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); + + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); + /** + * @todo Dataverse subject and affiliation should be highlighted but + * this is commented out right now because the "friendly" names are + * not being shown on the dataverse cards. See also + * https://github.com/IQSS/dataverse/issues/1431 + */ // solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); // solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); - /** - * @todo: show highlight on file card? - * https://redmine.hmdc.harvard.edu/issues/3848 - */ - solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); - List datasetFields = datasetFieldService.findAllOrderedById(); - for (DatasetFieldType datasetFieldType : datasetFields) { - String solrField = datasetFieldType.getSolrField().getNameSearchable(); - String displayName = datasetFieldType.getDisplayName(); - solrFieldsToHightlightOnMap.put(solrField, displayName); - } - for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { - String solrField = entry.getKey(); - // String displayName = entry.getValue(); - solrQuery.addHighlightField(solrField); + /** + * @todo: show highlight on file card? + * https://redmine.hmdc.harvard.edu/issues/3848 + */ + solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); + + for (DatasetFieldType datasetFieldType : datasetFields) { + String solrField = datasetFieldType.getSolrField().getNameSearchable(); + String displayName = datasetFieldType.getDisplayName(); + solrFieldsToHightlightOnMap.put(solrField, displayName); + } + for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { + String solrField = entry.getKey(); + // String displayName = entry.getValue(); + solrQuery.addHighlightField(solrField); + } } + solrQuery.setParam("fl", "*,score"); solrQuery.setParam("qt", "/select"); solrQuery.setParam("facet", "true"); @@ -214,6 +257,8 @@ public SolrQueryResponse search( * @todo: do we need facet.query? */ solrQuery.setParam("facet.query", "*"); + solrQuery.addFacetField(SearchFields.TYPE); // this one is always performed + for (String filterQuery : filterQueries) { solrQuery.addFilterQuery(filterQuery); } @@ -223,70 +268,73 @@ public SolrQueryResponse search( // See https://solr.apache.org/guide/8_11/spatial-search.html#bbox solrQuery.addFilterQuery("{!bbox sfield=" + SearchFields.GEOLOCATION + "}"); } + + List metadataBlockFacets = new LinkedList<>(); - // ----------------------------------- - // Facets to Retrieve - // ----------------------------------- - solrQuery.addFacetField(SearchFields.METADATA_TYPES); -// solrQuery.addFacetField(SearchFields.HOST_DATAVERSE); -// solrQuery.addFacetField(SearchFields.AUTHOR_STRING); - solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY); - solrQuery.addFacetField(SearchFields.METADATA_SOURCE); -// solrQuery.addFacetField(SearchFields.AFFILIATION); - solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR); -// solrQuery.addFacetField(SearchFields.CATEGORY); -// solrQuery.addFacetField(SearchFields.FILE_TYPE_MIME); -// solrQuery.addFacetField(SearchFields.DISTRIBUTOR); -// solrQuery.addFacetField(SearchFields.KEYWORD); - /** - * @todo when a new method on datasetFieldService is available - * (retrieveFacetsByDataverse?) only show the facets that the dataverse - * in question wants to show (and in the right order): - * https://redmine.hmdc.harvard.edu/issues/3490 - * - * also, findAll only returns advancedSearchField = true... we should - * probably introduce the "isFacetable" boolean rather than caring about - * if advancedSearchField is true or false - * - */ + if (addFacets) { + // ----------------------------------- + // Facets to Retrieve + // ----------------------------------- + solrQuery.addFacetField(SearchFields.METADATA_TYPES); + solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY); + solrQuery.addFacetField(SearchFields.METADATA_SOURCE); + solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR); + /** + * @todo when a new method on datasetFieldService is available + * (retrieveFacetsByDataverse?) only show the facets that the + * dataverse in question wants to show (and in the right order): + * https://redmine.hmdc.harvard.edu/issues/3490 + * + * also, findAll only returns advancedSearchField = true... we + * should probably introduce the "isFacetable" boolean rather than + * caring about if advancedSearchField is true or false + * + */ - List metadataBlockFacets = new LinkedList<>(); + if (dataverses != null) { + for (Dataverse dataverse : dataverses) { + if (dataverse != null) { + for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) { + DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType(); + solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable()); + } + // Get all metadata block facets configured to be displayed + metadataBlockFacets.addAll(dataverse.getMetadataBlockFacets()); + } + } + } + + solrQuery.addFacetField(SearchFields.FILE_TYPE); + /** + * @todo: hide the extra line this shows in the GUI... at least it's + * last... + */ + solrQuery.addFacetField(SearchFields.FILE_TAG); + if (!systemConfig.isPublicInstall()) { + solrQuery.addFacetField(SearchFields.ACCESS); + } + } + + //I'm not sure if just adding null here is good for hte permissions system... i think it needs something if(dataverses != null) { for(Dataverse dataverse : dataverses) { // ----------------------------------- // PERMISSION FILTER QUERY // ----------------------------------- - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe); + String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe, addFacets); if (permissionFilterQuery != null) { solrQuery.addFilterQuery(permissionFilterQuery); } - if (dataverse != null) { - for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) { - DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType(); - solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable()); - } - // Get all metadata block facets configured to be displayed - metadataBlockFacets.addAll(dataverse.getMetadataBlockFacets()); - } } } else { - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe); + String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe, addFacets); if (permissionFilterQuery != null) { solrQuery.addFilterQuery(permissionFilterQuery); } } - solrQuery.addFacetField(SearchFields.FILE_TYPE); - /** - * @todo: hide the extra line this shows in the GUI... at least it's - * last... - */ - solrQuery.addFacetField(SearchFields.TYPE); - solrQuery.addFacetField(SearchFields.FILE_TAG); - if (!systemConfig.isPublicInstall()) { - solrQuery.addFacetField(SearchFields.ACCESS); - } + /** * @todo: do sanity checking... throw error if negative */ @@ -416,34 +464,44 @@ public SolrQueryResponse search( Boolean datasetValid = (Boolean) solrDocument.getFieldValue(SearchFields.DATASET_VALID); List matchedFields = new ArrayList<>(); - List highlights = new ArrayList<>(); - Map highlightsMap = new HashMap<>(); - Map> highlightsMap2 = new HashMap<>(); - Map highlightsMap3 = new HashMap<>(); - if (queryResponse.getHighlighting().get(id) != null) { - for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { - String field = entry.getKey(); - String displayName = entry.getValue(); - - List highlightSnippets = queryResponse.getHighlighting().get(id).get(field); - if (highlightSnippets != null) { - matchedFields.add(field); - /** - * @todo only SolrField.SolrType.STRING? that's not - * right... knit the SolrField object more into the - * highlighting stuff - */ - SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true); - Highlight highlight = new Highlight(solrField, highlightSnippets, displayName); - highlights.add(highlight); - highlightsMap.put(solrField, highlight); - highlightsMap2.put(solrField, highlightSnippets); - highlightsMap3.put(field, highlight); + + SolrSearchResult solrSearchResult = new SolrSearchResult(query, name); + + if (addHighlights) { + List highlights = new ArrayList<>(); + Map highlightsMap = new HashMap<>(); + Map> highlightsMap2 = new HashMap<>(); + Map highlightsMap3 = new HashMap<>(); + if (queryResponse.getHighlighting().get(id) != null) { + for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { + String field = entry.getKey(); + String displayName = entry.getValue(); + + List highlightSnippets = queryResponse.getHighlighting().get(id).get(field); + if (highlightSnippets != null) { + matchedFields.add(field); + /** + * @todo only SolrField.SolrType.STRING? that's not + * right... knit the SolrField object more into the + * highlighting stuff + */ + SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true); + Highlight highlight = new Highlight(solrField, highlightSnippets, displayName); + highlights.add(highlight); + highlightsMap.put(solrField, highlight); + highlightsMap2.put(solrField, highlightSnippets); + highlightsMap3.put(field, highlight); + } } + } + solrSearchResult.setHighlightsAsList(highlights); + solrSearchResult.setHighlightsMap(highlightsMap); + solrSearchResult.setHighlightsAsMap(highlightsMap3); } - SolrSearchResult solrSearchResult = new SolrSearchResult(query, name); + + /** * @todo put all this in the constructor? */ @@ -470,9 +528,7 @@ public SolrQueryResponse search( solrSearchResult.setNameSort(nameSort); solrSearchResult.setReleaseOrCreateDate(release_or_create_date); solrSearchResult.setMatchedFields(matchedFields); - solrSearchResult.setHighlightsAsList(highlights); - solrSearchResult.setHighlightsMap(highlightsMap); - solrSearchResult.setHighlightsAsMap(highlightsMap3); + Map parent = new HashMap<>(); String description = (String) solrDocument.getFieldValue(SearchFields.DESCRIPTION); solrSearchResult.setDescriptionNoSnippet(description); @@ -863,7 +919,7 @@ public String getCapitalizedName(String name) { * * @return */ - private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe) { + private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe, boolean addFacets) { User user = dataverseRequest.getUser(); if (user == null) { @@ -922,9 +978,11 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ AuthenticatedUser au = (AuthenticatedUser) user; - // Logged in user, has publication status facet - // - solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); + if (addFacets) { + // Logged in user, has publication status facet + // + solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); + } // ---------------------------------------------------- // (3) Is this a Super User? From 6307292d3858bd62144e313de1b5574b55b4fb36 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 17 Oct 2023 13:07:14 -0400 Subject: [PATCH 054/414] more fixes/cleanup #9635 --- .../search/SearchIncludeFragment.java | 27 ++-- .../dataverse/search/SearchServiceBean.java | 127 ++++++++++-------- 2 files changed, 90 insertions(+), 64 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 1e42958fe4e..958ac0151c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -308,15 +308,23 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused this.setRootDv(true); } + filterQueriesFinal.addAll(filterQueries); + + /** + * Add type queries, for the types (Dataverses, Datasets, Datafiles) + * currently selected: + */ selectedTypesList = new ArrayList<>(); String[] parts = selectedTypesString.split(":"); selectedTypesList.addAll(Arrays.asList(parts)); - - - filterQueriesFinal.addAll(filterQueries); - + logger.info("selected types list size: "+selectedTypesList.size()); + String[] arr = selectedTypesList.toArray(new String[selectedTypesList.size()]); + selectedTypesHumanReadable = combine(arr, " OR "); + if (!selectedTypesHumanReadable.isEmpty()) { + typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")"; + } filterQueriesFinal.add(typeFilterQuery); if (page <= 1) { @@ -383,7 +391,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused // run another query to obtain the numbers of the unselected types: List filterQueriesFinalSecondPass = new ArrayList<>(); - filterQueriesFinalSecondPass.addAll(filterQueriesFinal); + filterQueriesFinalSecondPass.addAll(filterQueries); List selectedTypesListSecondPass = new ArrayList<>(); @@ -393,12 +401,13 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } - String[] arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]); + arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]); filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")"); - + + solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null, false, false); + if (solrQueryResponseSecondPass != null) { - solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); if (solrQueryResponseSecondPass.hasError()) { logger.info(solrQueryResponse.getError()); setSolrErrorEncountered(true); @@ -410,6 +419,8 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); } } + } else { + logger.warning("null solr response from the second pass type query"); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index aa2948eb8cb..d3ff7e42d15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -195,64 +195,11 @@ public SolrQueryResponse search( // } // solrQuery.setSort(sortClause); - List datasetFields = datasetFieldService.findAllOrderedById(); - Map solrFieldsToHightlightOnMap = new HashMap<>(); - if (addHighlights) { - solrQuery.setHighlight(true).setHighlightSnippets(1); - Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); - if (fragSize != null) { - solrQuery.setHighlightFragsize(fragSize); - } - solrQuery.setHighlightSimplePre(""); - solrQuery.setHighlightSimplePost(""); - - // TODO: Do not hard code "Name" etc as English here. - solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); - solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); - solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); - solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); - - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); - /** - * @todo Dataverse subject and affiliation should be highlighted but - * this is commented out right now because the "friendly" names are - * not being shown on the dataverse cards. See also - * https://github.com/IQSS/dataverse/issues/1431 - */ -// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); -// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); - /** - * @todo: show highlight on file card? - * https://redmine.hmdc.harvard.edu/issues/3848 - */ - solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); - - for (DatasetFieldType datasetFieldType : datasetFields) { - String solrField = datasetFieldType.getSolrField().getNameSearchable(); - String displayName = datasetFieldType.getDisplayName(); - solrFieldsToHightlightOnMap.put(solrField, displayName); - } - for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { - String solrField = entry.getKey(); - // String displayName = entry.getValue(); - solrQuery.addHighlightField(solrField); - } - } solrQuery.setParam("fl", "*,score"); solrQuery.setParam("qt", "/select"); solrQuery.setParam("facet", "true"); + /** * @todo: do we need facet.query? */ @@ -315,7 +262,61 @@ public SolrQueryResponse search( } } - + List datasetFields = datasetFieldService.findAllOrderedById(); + Map solrFieldsToHightlightOnMap = new HashMap<>(); + if (addHighlights) { + solrQuery.setHighlight(true).setHighlightSnippets(1); + Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); + if (fragSize != null) { + solrQuery.setHighlightFragsize(fragSize); + } + solrQuery.setHighlightSimplePre(""); + solrQuery.setHighlightSimplePost(""); + + // TODO: Do not hard code "Name" etc as English here. + solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); + solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); + solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); + solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); + + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); + /** + * @todo Dataverse subject and affiliation should be highlighted but + * this is commented out right now because the "friendly" names are + * not being shown on the dataverse cards. See also + * https://github.com/IQSS/dataverse/issues/1431 + */ +// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); +// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); + /** + * @todo: show highlight on file card? + * https://redmine.hmdc.harvard.edu/issues/3848 + */ + solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); + + for (DatasetFieldType datasetFieldType : datasetFields) { + String solrField = datasetFieldType.getSolrField().getNameSearchable(); + String displayName = datasetFieldType.getDisplayName(); + solrFieldsToHightlightOnMap.put(solrField, displayName); + } + for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { + String solrField = entry.getKey(); + // String displayName = entry.getValue(); + solrQuery.addHighlightField(solrField); + } + } + //I'm not sure if just adding null here is good for hte permissions system... i think it needs something if(dataverses != null) { for(Dataverse dataverse : dataverses) { @@ -370,7 +371,7 @@ public SolrQueryResponse search( // solrQuery.addNumericRangeFacet(SearchFields.PRODUCTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan); // solrQuery.addNumericRangeFacet(SearchFields.DISTRIBUTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan); solrQuery.setRows(numResultsPerPage); - logger.fine("Solr query:" + solrQuery); + logger.info("Solr query:" + solrQuery); // ----------------------------------- // Make the solr query @@ -378,8 +379,12 @@ public SolrQueryResponse search( QueryResponse queryResponse = null; try { queryResponse = solrClientService.getSolrClient().query(solrQuery); + } catch (RemoteSolrException ex) { String messageFromSolr = ex.getLocalizedMessage(); + + logger.info("message from solr exception: "+messageFromSolr); + String error = "Search Syntax Error: "; String stringToHide = "org.apache.solr.search.SyntaxError: "; if (messageFromSolr.startsWith(stringToHide)) { @@ -393,6 +398,12 @@ public SolrQueryResponse search( exceptionSolrQueryResponse.setError(error); // we can't show anything because of the search syntax error + + // We probably shouldn't be assuming that this is necessarily a + // "search syntax error" - could be anything else too - ? + + + long zeroNumResultsFound = 0; long zeroGetResultsStart = 0; List emptySolrSearchResults = new ArrayList<>(); @@ -408,6 +419,10 @@ public SolrQueryResponse search( } catch (SolrServerException | IOException ex) { throw new SearchException("Internal Dataverse Search Engine Error", ex); } + + int statusCode = queryResponse.getStatus(); + + logger.info("status code of the query response: "+statusCode); SolrDocumentList docs = queryResponse.getResults(); List solrSearchResults = new ArrayList<>(); From 74eb7c551d209c9e460cbaea5572004b0fcad0bc Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 17 Oct 2023 16:09:32 -0400 Subject: [PATCH 055/414] more fixes (#9635) --- .../search/SearchIncludeFragment.java | 24 +++++++++++++++---- .../dataverse/search/SearchServiceBean.java | 2 ++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 958ac0151c6..177186fce49 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -395,9 +395,23 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused List selectedTypesListSecondPass = new ArrayList<>(); - for (String dvObjectType : previewCountbyType.keySet()) { - if (previewCountbyType.get(dvObjectType) == -1) { - selectedTypesListSecondPass.add(dvObjectType); + // @todo: simplify this! + for (String dvObjectTypeLabel : previewCountbyType.keySet()) { + if (previewCountbyType.get(dvObjectTypeLabel) == -1) { + String dvObjectType = null; + + if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("dataverses"))) { + dvObjectType = "dataverses"; + } else if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("datasets"))) { + dvObjectType = "datasets"; + } else if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("files"))) { + dvObjectType = "files"; + } + + if (dvObjectType != null) { + logger.info("adding object type to the second pass query: "+dvObjectType); + selectedTypesListSecondPass.add(dvObjectType); + } } } @@ -409,13 +423,15 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused if (solrQueryResponseSecondPass != null) { if (solrQueryResponseSecondPass.hasError()) { - logger.info(solrQueryResponse.getError()); + logger.info(solrQueryResponseSecondPass.getError()); setSolrErrorEncountered(true); } // And now populate the remaining type facets: for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) { + logger.info("facet category: "+facetCategory.getName()); for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + logger.info("facet label: "+facetLabel.getName()); previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index d3ff7e42d15..18cdbaa6994 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -423,6 +423,7 @@ public SolrQueryResponse search( int statusCode = queryResponse.getStatus(); logger.info("status code of the query response: "+statusCode); + ///logger.info("number of hits: "+queryResponse._size()); SolrDocumentList docs = queryResponse.getResults(); List solrSearchResults = new ArrayList<>(); @@ -823,6 +824,7 @@ public SolrQueryResponse search( facetCategory.setFacetLabel(facetLabelList); if (!facetLabelList.isEmpty()) { if (facetCategory.getName().equals(SearchFields.TYPE)) { + logger.info("type facet encountered"); // the "type" facet is special, these are not typeFacetCategories.add(facetCategory); } else if (facetCategory.getName().equals(SearchFields.PUBLICATION_STATUS)) { From f1e37ae0ff01e1fe0030202be1883f823bb8d080 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 17 Oct 2023 17:26:54 -0400 Subject: [PATCH 056/414] finally working as it should; much simplified/way less expensive second pass query sent in order to populate the unchecked type count facets. (#9635) --- .../iq/dataverse/search/SearchIncludeFragment.java | 4 +--- .../iq/dataverse/search/SearchServiceBean.java | 13 +++++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 177186fce49..47a5621c3d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -418,7 +418,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]); filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")"); - solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null, false, false); + solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false); if (solrQueryResponseSecondPass != null) { @@ -429,9 +429,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused // And now populate the remaining type facets: for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) { - logger.info("facet category: "+facetCategory.getName()); for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { - logger.info("facet label: "+facetLabel.getName()); previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 18cdbaa6994..be3330080c4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -165,7 +165,8 @@ public SolrQueryResponse search( List dataverses, String query, List filterQueries, - String sortField, String sortOrder, + String sortField, + String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage, @@ -189,7 +190,11 @@ public SolrQueryResponse search( // SortClause foo = new SortClause("name", SolrQuery.ORDER.desc); // if (query.equals("*") || query.equals("*:*")) { // solrQuery.setSort(new SortClause(SearchFields.NAME_SORT, SolrQuery.ORDER.asc)); - solrQuery.setSort(new SortClause(sortField, sortOrder)); + if (sortField != null) { + // is it ok not to specify any sort? - there are cases where we + // don't care, and it must cost some extra cycles -- L.A. + solrQuery.setSort(new SortClause(sortField, sortOrder)); + } // } else { // solrQuery.setSort(sortClause); // } @@ -423,7 +428,8 @@ public SolrQueryResponse search( int statusCode = queryResponse.getStatus(); logger.info("status code of the query response: "+statusCode); - ///logger.info("number of hits: "+queryResponse._size()); + logger.info("_size from query response: "+queryResponse._size()); + logger.info("qtime: "+queryResponse.getQTime()); SolrDocumentList docs = queryResponse.getResults(); List solrSearchResults = new ArrayList<>(); @@ -824,7 +830,6 @@ public SolrQueryResponse search( facetCategory.setFacetLabel(facetLabelList); if (!facetLabelList.isEmpty()) { if (facetCategory.getName().equals(SearchFields.TYPE)) { - logger.info("type facet encountered"); // the "type" facet is special, these are not typeFacetCategories.add(facetCategory); } else if (facetCategory.getName().equals(SearchFields.PUBLICATION_STATUS)) { From c1a19299e547fbc47322dafde74bc75d2e138d9c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 18 Oct 2023 13:48:47 -0400 Subject: [PATCH 057/414] a stub for interecepting a "circuit breaker" 503 from the server (#9635) --- .../dataverse/search/SearchServiceBean.java | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index be3330080c4..1b92c2a4a46 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -382,13 +382,35 @@ public SolrQueryResponse search( // Make the solr query // ----------------------------------- QueryResponse queryResponse = null; + boolean solrTemporarilyUnavailable = false; + try { queryResponse = solrClientService.getSolrClient().query(solrQuery); } catch (RemoteSolrException ex) { String messageFromSolr = ex.getLocalizedMessage(); - logger.info("message from solr exception: "+messageFromSolr); + logger.info("message from tye solr exception: "+messageFromSolr); + logger.info("code from the solr exception: "+ex.code()); + + if (queryResponse != null) { + logger.info("return code: "+queryResponse.getStatus()); + } + + // We probably shouldn't be assuming that this is necessarily a + // "search syntax error", as the code below implies - could be + // something else too - ? + + // Specifically, we now rely on the Solr "circuit breaker" mechanism + // to start dropping requests with 503, when the service is + // overwhelmed with requests load (with the assumption that this is + // a transient condition): + + if (ex.code() == 503) { + solrTemporarilyUnavailable = true; + // actual logic for communicating this state back to the local + // client code TBD (@todo) + } String error = "Search Syntax Error: "; String stringToHide = "org.apache.solr.search.SyntaxError: "; @@ -403,12 +425,7 @@ public SolrQueryResponse search( exceptionSolrQueryResponse.setError(error); // we can't show anything because of the search syntax error - - // We probably shouldn't be assuming that this is necessarily a - // "search syntax error" - could be anything else too - ? - - - + long zeroNumResultsFound = 0; long zeroGetResultsStart = 0; List emptySolrSearchResults = new ArrayList<>(); From ecbb020ed7da390c378fb76f08c9c5fb72677189 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 09:18:12 -0400 Subject: [PATCH 058/414] add/standardize retrieveSizeFromMedia call --- .../iq/dataverse/dataaccess/FileAccessIO.java | 33 +- .../dataverse/dataaccess/InputStreamIO.java | 5 + .../dataaccess/RemoteOverlayAccessIO.java | 14 +- .../iq/dataverse/dataaccess/S3AccessIO.java | 21 +- .../iq/dataverse/dataaccess/StorageIO.java | 379 +++++++++--------- .../dataverse/dataaccess/SwiftAccessIO.java | 5 + 6 files changed, 241 insertions(+), 216 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index d95df1567bd..3e6c802c526 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -35,8 +35,6 @@ import java.util.List; import java.util.function.Predicate; import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; // Dataverse imports: @@ -115,7 +113,7 @@ public void open (DataAccessOption... options) throws IOException { this.setInputStream(fin); setChannel(fin.getChannel()); - this.setSize(getLocalFileSize()); + this.setSize(retrieveSizeFromMedia()); if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") @@ -506,21 +504,6 @@ public void delete() throws IOException { // Auxilary helper methods, filesystem access-specific: - private long getLocalFileSize () { - long fileSize = -1; - - try { - File testFile = getFileSystemPath().toFile(); - if (testFile != null) { - fileSize = testFile.length(); - } - return fileSize; - } catch (IOException ex) { - return -1; - } - - } - public FileInputStream openLocalFileAsInputStream () { FileInputStream in; @@ -742,4 +725,18 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE return toDelete; } + @Override + public long retrieveSizeFromMedia() { + long fileSize = -1; + try { + File testFile = getFileSystemPath().toFile(); + if (testFile != null) { + fileSize = testFile.length(); + } + return fileSize; + } catch (IOException ex) { + return -1; + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index be6f9df0254..de392b74cca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -165,4 +165,9 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); } + @Override + public long retrieveSizeFromMedia() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index a9653f2ab68..9c1f5ba23aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -64,8 +64,6 @@ public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); - - String globusAccessToken = null; protected StorageIO baseStore = null; protected String path = null; @@ -155,7 +153,7 @@ public void open(DataAccessOption... options) throws IOException { this.setSize(dataFile.getFilesize()); } else { logger.fine("Setting size"); - this.setSize(retrieveSize()); + this.setSize(retrieveSizeFromMedia()); } if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { @@ -183,7 +181,8 @@ public void open(DataAccessOption... options) throws IOException { } } - long retrieveSize() { + @Override + public long retrieveSizeFromMedia() { long size = -1; HttpHead head = new HttpHead(baseUrl + "/" + path); try { @@ -383,7 +382,7 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { @Override public boolean exists() { logger.fine("Exists called"); - return (retrieveSize() != -1); + return (retrieveSizeFromMedia() != -1); } @Override @@ -502,8 +501,9 @@ protected void configureStores(DataAccessRequest req, String driverId, String st if (index > 0) { storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); } - // THe base store needs the baseStoreIdentifier and not the relative URL - fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); + // The base store needs the baseStoreIdentifier and not the relative URL (if it exists) + int endOfId = storageLocation.indexOf("//"); + fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation; switch (baseDriverType) { case DataAccess.S3: diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 822ada0b83e..b0f9f0ffb05 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -207,14 +207,7 @@ public void open(DataAccessOption... options) throws IOException { if (isReadAccess) { - key = getMainFileKey(); - ObjectMetadata objectMetadata = null; - try { - objectMetadata = s3.getObjectMetadata(bucketName, key); - } catch (SdkClientException sce) { - throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); - } - this.setSize(objectMetadata.getContentLength()); + this.setSize(retrieveSizeFromMedia()); if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") @@ -1385,4 +1378,16 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE } return toDelete; } + + @Override + public long retrieveSizeFromMedia() throws IOException { + key = getMainFileKey(); + ObjectMetadata objectMetadata = null; + try { + objectMetadata = s3.getObjectMetadata(bucketName, key); + } catch (SdkClientException sce) { + throw new IOException("Cannot get S3 object " + key + " (" + sce.getMessage() + ")"); + } + return objectMetadata.getContentLength(); + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 3bc83538679..f3c2ef5f513 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -20,7 +20,6 @@ package edu.harvard.iq.dataverse.dataaccess; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; @@ -43,7 +42,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; - /** * * @author Leonid Andreev @@ -55,15 +53,15 @@ public abstract class StorageIO { public StorageIO() { } - + public StorageIO(String storageLocation, String driverId) { - this.driverId=driverId; + this.driverId = driverId; } public StorageIO(T dvObject, DataAccessRequest req, String driverId) { this.dvObject = dvObject; this.req = req; - this.driverId=driverId; + this.driverId = driverId; if (this.req == null) { this.req = new DataAccessRequest(); } @@ -72,18 +70,19 @@ public StorageIO(T dvObject, DataAccessRequest req, String driverId) { } } - - // Abstract methods to be implemented by the storage drivers: public abstract void open(DataAccessOption... option) throws IOException; protected boolean isReadAccess = false; protected boolean isWriteAccess = false; - //A public store is one in which files may be accessible outside Dataverse and therefore accessible without regard to Dataverse's access controls related to restriction and embargoes. - //Currently, this is just used to warn users at upload time rather than disable restriction/embargo. + // A public store is one in which files may be accessible outside Dataverse and + // therefore accessible without regard to Dataverse's access controls related to + // restriction and embargoes. + // Currently, this is just used to warn users at upload time rather than disable + // restriction/embargo. static protected Map driverPublicAccessMap = new HashMap(); - + public boolean canRead() { return isReadAccess; } @@ -94,115 +93,118 @@ public boolean canWrite() { public abstract String getStorageLocation() throws IOException; - // This method will return a Path, if the storage method is a - // local filesystem. Otherwise should throw an IOException. + // This method will return a Path, if the storage method is a + // local filesystem. Otherwise should throw an IOException. public abstract Path getFileSystemPath() throws IOException; - - public abstract boolean exists() throws IOException; - + + public abstract boolean exists() throws IOException; + public abstract void delete() throws IOException; - + // this method for copies a local Path (for ex., a // temp file, into this DataAccess location): public abstract void savePath(Path fileSystemPath) throws IOException; - + // same, for an InputStream: /** - * This method copies a local InputStream into this DataAccess location. - * Note that the S3 driver implementation of this abstract method is problematic, - * because S3 cannot save an object of an unknown length. This effectively - * nullifies any benefits of streaming; as we cannot start saving until we - * have read the entire stream. - * One way of solving this would be to buffer the entire stream as byte[], - * in memory, then save it... Which of course would be limited by the amount - * of memory available, and thus would not work for streams larger than that. - * So we have eventually decided to save save the stream to a temp file, then - * save to S3. This is slower, but guaranteed to work on any size stream. - * An alternative we may want to consider is to not implement this method - * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, - * similarly to how we handle attempts to open OutputStreams, in this and the - * Swift driver. - * (Not an issue in either FileAccessIO or SwiftAccessIO implementations) + * This method copies a local InputStream into this DataAccess location. Note + * that the S3 driver implementation of this abstract method is problematic, + * because S3 cannot save an object of an unknown length. This effectively + * nullifies any benefits of streaming; as we cannot start saving until we have + * read the entire stream. One way of solving this would be to buffer the entire + * stream as byte[], in memory, then save it... Which of course would be limited + * by the amount of memory available, and thus would not work for streams larger + * than that. So we have eventually decided to save save the stream to a temp + * file, then save to S3. This is slower, but guaranteed to work on any size + * stream. An alternative we may want to consider is to not implement this + * method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. (Not an issue in either + * FileAccessIO or SwiftAccessIO implementations) * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. - */ + */ public abstract void saveInputStream(InputStream inputStream) throws IOException; + public abstract void saveInputStream(InputStream inputStream, Long filesize) throws IOException; - + // Auxiliary File Management: (new as of 4.0.2!) - + // An "auxiliary object" is an abstraction of the traditional DVN/Dataverse - // mechanism of storing extra files related to the man StudyFile/DataFile - - // such as "saved original" and cached format conversions for tabular files, - // thumbnails for images, etc. - in physical files with the same file - // name but various reserved extensions. - - //This function retrieves auxiliary files related to datasets, and returns them as inputstream - public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException ; - + // mechanism of storing extra files related to the man StudyFile/DataFile - + // such as "saved original" and cached format conversions for tabular files, + // thumbnails for images, etc. - in physical files with the same file + // name but various reserved extensions. + + // This function retrieves auxiliary files related to datasets, and returns them + // as inputstream + public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException; + public abstract Channel openAuxChannel(String auxItemTag, DataAccessOption... option) throws IOException; - - public abstract long getAuxObjectSize(String auxItemTag) throws IOException; - - public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException; - - public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException; - - public abstract void backupAsAux(String auxItemTag) throws IOException; - - public abstract void revertBackupAsAux(String auxItemTag) throws IOException; - - // this method copies a local filesystem Path into this DataAccess Auxiliary location: + + public abstract long getAuxObjectSize(String auxItemTag) throws IOException; + + public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException; + + public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException; + + public abstract void backupAsAux(String auxItemTag) throws IOException; + + public abstract void revertBackupAsAux(String auxItemTag) throws IOException; + + // this method copies a local filesystem Path into this DataAccess Auxiliary + // location: public abstract void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException; - + /** - * This method copies a local InputStream into this DataAccess Auxiliary location. - * Note that the S3 driver implementation of this abstract method is problematic, - * because S3 cannot save an object of an unknown length. This effectively - * nullifies any benefits of streaming; as we cannot start saving until we - * have read the entire stream. - * One way of solving this would be to buffer the entire stream as byte[], - * in memory, then save it... Which of course would be limited by the amount - * of memory available, and thus would not work for streams larger than that. - * So we have eventually decided to save save the stream to a temp file, then - * save to S3. This is slower, but guaranteed to work on any size stream. - * An alternative we may want to consider is to not implement this method - * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, - * similarly to how we handle attempts to open OutputStreams, in this and the - * Swift driver. - * (Not an issue in either FileAccessIO or SwiftAccessIO implementations) + * This method copies a local InputStream into this DataAccess Auxiliary + * location. Note that the S3 driver implementation of this abstract method is + * problematic, because S3 cannot save an object of an unknown length. This + * effectively nullifies any benefits of streaming; as we cannot start saving + * until we have read the entire stream. One way of solving this would be to + * buffer the entire stream as byte[], in memory, then save it... Which of + * course would be limited by the amount of memory available, and thus would not + * work for streams larger than that. So we have eventually decided to save save + * the stream to a temp file, then save to S3. This is slower, but guaranteed to + * work on any size stream. An alternative we may want to consider is to not + * implement this method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. (Not an issue in either + * FileAccessIO or SwiftAccessIO implementations) * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. - */ - public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException; - public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException; - - public abstract ListlistAuxObjects() throws IOException; - - public abstract void deleteAuxObject(String auxItemTag) throws IOException; - + */ + public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException; + + public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) + throws IOException; + + public abstract List listAuxObjects() throws IOException; + + public abstract void deleteAuxObject(String auxItemTag) throws IOException; + public abstract void deleteAllAuxObjects() throws IOException; private DataAccessRequest req; private InputStream in = null; - private OutputStream out; + private OutputStream out; protected Channel channel; protected DvObject dvObject; protected String driverId; - /*private int status;*/ + /* private int status; */ private long size; /** - * Where in the file to seek to when reading (default is zero bytes, the - * start of the file). + * Where in the file to seek to when reading (default is zero bytes, the start + * of the file). */ private long offset; - + private String mimeType; private String fileName; private String varHeader; @@ -215,8 +217,8 @@ public boolean canWrite() { private String swiftContainerName; private boolean isLocalFile = false; - /*private boolean isRemoteAccess = false;*/ - /*private boolean isHttpAccess = false;*/ + /* private boolean isRemoteAccess = false; */ + /* private boolean isHttpAccess = false; */ private boolean noVarHeader = false; // For remote downloads: @@ -229,13 +231,14 @@ public boolean canWrite() { private String remoteUrl; protected String remoteStoreName = null; protected URL remoteStoreUrl = null; - + // For HTTP-based downloads: - /*private GetMethod method = null; - private Header[] responseHeaders;*/ + /* + * private GetMethod method = null; private Header[] responseHeaders; + */ // getters: - + public Channel getChannel() throws IOException { return channel; } @@ -255,16 +258,15 @@ public ReadableByteChannel getReadChannel() throws IOException { return (ReadableByteChannel) channel; } - - public DvObject getDvObject() - { + + public DvObject getDvObject() { return dvObject; } - + public DataFile getDataFile() { return (DataFile) dvObject; } - + public Dataset getDataset() { return (Dataset) dvObject; } @@ -277,9 +279,9 @@ public DataAccessRequest getRequest() { return req; } - /*public int getStatus() { - return status; - }*/ + /* + * public int getStatus() { return status; } + */ public long getSize() { return size; @@ -292,9 +294,9 @@ public long getOffset() { public InputStream getInputStream() throws IOException { return in; } - + public OutputStream getOutputStream() throws IOException { - return out; + return out; } public String getMimeType() { @@ -317,23 +319,23 @@ public String getRemoteUrl() { return remoteUrl; } - public String getTemporarySwiftUrl(){ + public String getTemporarySwiftUrl() { return temporarySwiftUrl; } - + public String getTempUrlExpiry() { return tempUrlExpiry; } - + public String getTempUrlSignature() { return tempUrlSignature; } - + public String getSwiftFileName() { return swiftFileName; } - public String getSwiftContainerName(){ + public String getSwiftContainerName() { return swiftContainerName; } @@ -344,34 +346,32 @@ public String getRemoteStoreName() { public URL getRemoteStoreUrl() { return remoteStoreUrl; } - - /*public GetMethod getHTTPMethod() { - return method; - } - public Header[] getResponseHeaders() { - return responseHeaders; - }*/ + /* + * public GetMethod getHTTPMethod() { return method; } + * + * public Header[] getResponseHeaders() { return responseHeaders; } + */ public boolean isLocalFile() { return isLocalFile; } - - // "Direct Access" StorageIO is used to access a physical storage - // location not associated with any dvObject. (For example, when we - // are deleting a physical file left behind by a DataFile that's - // already been deleted from the database). + + // "Direct Access" StorageIO is used to access a physical storage + // location not associated with any dvObject. (For example, when we + // are deleting a physical file left behind by a DataFile that's + // already been deleted from the database). public boolean isDirectAccess() { - return dvObject == null; + return dvObject == null; } - /*public boolean isRemoteAccess() { - return isRemoteAccess; - }*/ + /* + * public boolean isRemoteAccess() { return isRemoteAccess; } + */ - /*public boolean isHttpAccess() { - return isHttpAccess; - }*/ + /* + * public boolean isHttpAccess() { return isHttpAccess; } + */ public boolean isDownloadSupported() { return isDownloadSupported; @@ -398,9 +398,9 @@ public void setRequest(DataAccessRequest dar) { req = dar; } - /*public void setStatus(int s) { - status = s; - }*/ + /* + * public void setStatus(int s) { status = s; } + */ public void setSize(long s) { size = s; @@ -421,11 +421,11 @@ public void setOffset(long offset) throws IOException { public void setInputStream(InputStream is) { in = is; } - + public void setOutputStream(OutputStream os) { - out = os; - } - + out = os; + } + public void setChannel(Channel c) { channel = c; } @@ -450,45 +450,46 @@ public void setRemoteUrl(String u) { remoteUrl = u; } - public void setTemporarySwiftUrl(String u){ + public void setTemporarySwiftUrl(String u) { temporarySwiftUrl = u; } - - public void setTempUrlExpiry(Long u){ + + public void setTempUrlExpiry(Long u) { tempUrlExpiry = String.valueOf(u); } - + public void setSwiftFileName(String u) { swiftFileName = u; } - - public void setTempUrlSignature(String u){ + + public void setTempUrlSignature(String u) { tempUrlSignature = u; } - public void setSwiftContainerName(String u){ + public void setSwiftContainerName(String u) { swiftContainerName = u; } - /*public void setHTTPMethod(GetMethod hm) { - method = hm; - }*/ + /* + * public void setHTTPMethod(GetMethod hm) { method = hm; } + */ - /*public void setResponseHeaders(Header[] headers) { - responseHeaders = headers; - }*/ + /* + * public void setResponseHeaders(Header[] headers) { responseHeaders = headers; + * } + */ public void setIsLocalFile(boolean f) { isLocalFile = f; } - /*public void setIsRemoteAccess(boolean r) { - isRemoteAccess = r; - }*/ + /* + * public void setIsRemoteAccess(boolean r) { isRemoteAccess = r; } + */ - /*public void setIsHttpAccess(boolean h) { - isHttpAccess = h; - }*/ + /* + * public void setIsHttpAccess(boolean h) { isHttpAccess = h; } + */ public void setIsDownloadSupported(boolean d) { isDownloadSupported = d; @@ -506,12 +507,11 @@ public void setNoVarHeader(boolean nvh) { noVarHeader = nvh; } - // connection management methods: - /*public void releaseConnection() { - if (method != null) { - method.releaseConnection(); - } - }*/ + // connection management methods: + /* + * public void releaseConnection() { if (method != null) { + * method.releaseConnection(); } } + */ public void closeInputStream() { if (in != null) { @@ -528,7 +528,7 @@ public void closeInputStream() { } } } - + public String generateVariableHeader(List dvs) { String varHeader = null; @@ -571,14 +571,14 @@ protected boolean isWriteAccessRequested(DataAccessOption... options) throws IOE return false; } - public boolean isBelowIngestSizeLimit() { - long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1")); - if(limit>0 && getSize()>limit) { - return false; - } else { - return true; - } - } + public boolean isBelowIngestSizeLimit() { + long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1")); + if (limit > 0 && getSize() > limit) { + return false; + } else { + return true; + } + } public boolean downloadRedirectEnabled() { return false; @@ -587,36 +587,38 @@ public boolean downloadRedirectEnabled() { public boolean downloadRedirectEnabled(String auxObjectTag) { return false; } - - public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { + + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) + throws IOException { throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); } - public static boolean isPublicStore(String driverId) { - //Read once and cache - if(!driverPublicAccessMap.containsKey(driverId)) { - driverPublicAccessMap.put(driverId, Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public"))); + // Read once and cache + if (!driverPublicAccessMap.containsKey(driverId)) { + driverPublicAccessMap.put(driverId, + Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public"))); } return driverPublicAccessMap.get(driverId); } - + public static String getDriverPrefix(String driverId) { - return driverId+ DataAccess.SEPARATOR; + return driverId + DataAccess.SEPARATOR; } - + public static boolean isDirectUploadEnabled(String driverId) { - return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3) && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect"))) || - Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band")); + return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3) + && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect"))) + || Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band")); } - - //Check that storageIdentifier is consistent with store's config - //False will prevent direct uploads + + // Check that storageIdentifier is consistent with store's config + // False will prevent direct uploads static boolean isValidIdentifier(String driverId, String storageId) { return false; } - - //Utility to verify the standard UUID pattern for stored files. + + // Utility to verify the standard UUID pattern for stored files. protected static boolean usesStandardNamePattern(String identifier) { Pattern r = Pattern.compile("^[a-f,0-9]{11}-[a-f,0-9]{12}$"); @@ -626,4 +628,15 @@ protected static boolean usesStandardNamePattern(String identifier) { public abstract List cleanUp(Predicate filter, boolean dryRun) throws IOException; + /** + * A storage-type-specific mechanism for retrieving the size of a file. Intended + * primarily as a way to get the size before it has been recorded in the + * database, e.g. during direct/out-of-band transfers but could be useful to + * check the db values. + * + * @return file size in bytes + * @throws IOException + */ + public abstract long retrieveSizeFromMedia() throws IOException; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 6c84009de3e..0d1dab581fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -954,4 +954,9 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE } return toDelete; } + + @Override + public long retrieveSizeFromMedia() throws IOException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); + } } From 68ab3f3cb6399d4c73bff0bcc84d9687ab369351 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 09:18:58 -0400 Subject: [PATCH 059/414] typos, change hash notice --- .../iq/dataverse/globus/GlobusServiceBean.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 49572519696..8aa9915db58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -4,8 +4,6 @@ import com.github.benmanes.caffeine.cache.Caffeine; import com.google.gson.FieldNamingPolicy; import com.google.gson.GsonBuilder; -import com.nimbusds.oauth2.sdk.pkce.CodeVerifier; - import edu.harvard.iq.dataverse.*; import jakarta.ejb.Asynchronous; @@ -21,7 +19,6 @@ import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonPatch; -import jakarta.json.JsonValue; import jakarta.servlet.http.HttpServletRequest; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; @@ -662,6 +659,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S GlobusEndpoint endpoint = getGlobusEndpoint(dataset); ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + if(ruleId!=null) { Long datasetId = rulesCache.getIfPresent(ruleId); if(datasetId!=null) { @@ -1095,7 +1093,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) String fullPath = id.split("IDsplit")[1]; String fileName = id.split("IDsplit")[2]; - // ToDo: what if the file doesnot exists in s3 + // ToDo: what if the file does not exist in s3 // ToDo: what if checksum calculation failed do { @@ -1107,8 +1105,8 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) } catch (IOException ioex) { count = 3; logger.info(ioex.getMessage()); - globusLogger.info("DataFile (fullPAth " + fullPath - + ") does not appear to be accessible withing Dataverse: "); + globusLogger.info("DataFile (fullPath " + fullPath + + ") does not appear to be accessible within Dataverse: "); } catch (Exception ex) { count = count + 1; ex.printStackTrace(); @@ -1119,7 +1117,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) } while (count < 3); if (checksumVal.length() == 0) { - checksumVal = "NULL"; + checksumVal = "Not available in Dataverse"; } String mimeType = calculatemime(fileName); @@ -1384,4 +1382,5 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { private static boolean isDataverseManaged(String driverId) { return Boolean.getBoolean("dataverse.files." + driverId + ".managed"); } + } From d57b9f048490bcc2a38d8c2fc422e3797bad2fbc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 09:19:24 -0400 Subject: [PATCH 060/414] add getLocationFromStorage, add tests --- .../iq/dataverse/dataaccess/DataAccess.java | 34 +++++++++++++++---- .../dataverse/dataaccess/DataAccessTest.java | 20 +++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 8387f8110cf..a3345cb7a8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -153,12 +153,34 @@ public static String[] getDriverIdAndStorageLocation(String storageLocation) { } public static String getStorageIdFromLocation(String location) { - if(location.contains(SEPARATOR)) { - //It's a full location with a driverId, so strip and reapply the driver id - //NOte that this will strip the bucketname out (which s3 uses) but the S3IOStorage class knows to look at re-insert it - return location.substring(0,location.indexOf(SEPARATOR) +3) + location.substring(location.lastIndexOf('/')+1); - } - return location.substring(location.lastIndexOf('/')+1); + if (location.contains(SEPARATOR)) { + // It's a full location with a driverId, so strip and reapply the driver id + // NOte that this will strip the bucketname out (which s3 uses) but the + // S3IOStorage class knows to look at re-insert it + return location.substring(0, location.indexOf(SEPARATOR) + 3) + + location.substring(location.lastIndexOf('/') + 1); + } + return location.substring(location.lastIndexOf('/') + 1); + } + + /** Changes storageidentifiers of the form + * s3://bucketname/18b39722140-50eb7d3c5ece or file://18b39722140-50eb7d3c5ece to s3://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece + * and + * 18b39722140-50eb7d3c5ece to 10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece + * @param id + * @param dataset + * @return + */ + public static String getLocationFromStorageId(String id, Dataset dataset) { + String path= dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + if (id.contains(SEPARATOR)) { + // It's a full location with a driverId, so strip and reapply the driver id + // NOte that this will strip the bucketname out (which s3 uses) but the + // S3IOStorage class knows to look at re-insert it + return id.substring(0, id.indexOf(SEPARATOR) + 3) + path + + id.substring(id.lastIndexOf('/') + 1); + } + return path + id.substring(id.lastIndexOf('/') + 1); } public static String getDriverType(String driverId) { diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java index 1ff914adff9..f7ce061fb24 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java @@ -59,4 +59,24 @@ void testCreateNewStorageIO_createsFileAccessIObyDefault() throws IOException { StorageIO storageIo = DataAccess.createNewStorageIO(dataset, "valid-tag"); assertTrue(storageIo.getClass().equals(FileAccessIO.class)); } + + @Test + void testGetLocationFromStorageId() { + Dataset d = new Dataset(); + d.setAuthority("10.5072"); + d.setIdentifier("FK2/ABCDEF"); + assertEquals("s3://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece", + DataAccess.getLocationFromStorageId("s3://18b39722140-50eb7d3c5ece", d)); + assertEquals("10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece", + DataAccess.getLocationFromStorageId("18b39722140-50eb7d3c5ece", d)); + + } + + @Test + void testGetStorageIdFromLocation() { + assertEquals("file://18b39722140-50eb7d3c5ece", + DataAccess.getStorageIdFromLocation("file://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece")); + assertEquals("s3://18b39722140-50eb7d3c5ece", + DataAccess.getStorageIdFromLocation("s3://bucketname:10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece")); + } } From 34286830d1cfa4849a82909eaff20528980fd717 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 09:19:53 -0400 Subject: [PATCH 061/414] get size for direct uploads --- .../impl/CreateNewDataFilesCommand.java | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index ac701da1be9..a8be1bd5116 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -3,18 +3,20 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -//import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; @@ -83,7 +85,7 @@ public class CreateNewDataFilesCommand extends AbstractCommand sio; + try { + sio = DataAccess.getDirectStorageIO(DataAccess.getLocationFromStorageId(newStorageIdentifier, version.getDataset())); + + // get file size + // Note - some stores (e.g. AWS S3) only offer eventual consistency and a call + // to get the size immediately after uploading may fail. As of the addition of + // PR#9409 adding storage quotas, we are now requiring size to be available + // earlier. If this is seen, adding + // a delay/retry may help + newFileSize = sio.retrieveSizeFromMedia(); + } catch (IOException e) { + // If we don't get a file size, a CommandExecutionException will be thrown later in the code + e.printStackTrace(); + } + } } // Finally, if none of the special cases above were applicable (or // if we were unable to unpack an uploaded file, etc.), we'll just From 2adfa8af01124c31ada3f1801dd5f3dac0fd704e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 09:20:31 -0400 Subject: [PATCH 062/414] refactor, add delete method, etc. --- .../dataaccess/GlobusOverlayAccessIO.java | 157 ++++++++++++------ 1 file changed, 110 insertions(+), 47 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 965dc3c0947..011bb74f720 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -17,11 +17,14 @@ import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; import org.apache.http.util.EntityUtils; +import jakarta.json.Json; import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; /** * @author qqmyers @@ -43,7 +46,6 @@ public class GlobusOverlayAccessIO extends RemoteOverlayAcce private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); - String globusAccessToken = null; /* * If this is set to true, the store supports Globus transfer in and * Dataverse/the globus app manage file locations, access controls, deletion, @@ -51,35 +53,64 @@ public class GlobusOverlayAccessIO extends RemoteOverlayAcce */ private boolean dataverseManaged = false; + private String relativeDirectoryPath; + + private String endpointPath; + + private String filename; + + private String endpoint; + public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); - if (dvObject instanceof DataFile) { - globusAccessToken = retrieveGlobusAccessToken(); - } dataverseManaged = isDataverseManaged(this.driverId); + } + + private void parsePath() { + int filenameStart = path.lastIndexOf("/") + 1; + String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3); + int pathStart = endpointWithBasePath.indexOf("/"); + logger.info("endpointWithBasePath: " + endpointWithBasePath); + endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); + logger.info("endpointPath: " + endpointPath); + + if (dataverseManaged && (dvObject!=null)) { + + Dataset ds = null; + if (dvObject instanceof Dataset) { + ds = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + ds = ((DataFile) dvObject).getOwner(); + } + relativeDirectoryPath = "/" + ds.getAuthority() + "/" + ds.getIdentifier(); + } else { + relativeDirectoryPath = ""; + } + if (filenameStart > 0) { + relativeDirectoryPath = relativeDirectoryPath + path.substring(0, filenameStart); + } + logger.info("relativeDirectoryPath finally: " + relativeDirectoryPath); + filename = path.substring(filenameStart); + endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; - logger.info("GAT3: " + globusAccessToken); + } public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { this.driverId = driverId; + configureStores(null, driverId, storageLocation); this.dataverseManaged = isDataverseManaged(this.driverId); if (dataverseManaged) { String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); path = parts[1]; } else { this.setIsLocalFile(false); - configureStores(null, driverId, storageLocation); - path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); validatePath(path); logger.fine("Relative path: " + path); } -//ToDo - only when needed? - globusAccessToken = retrieveGlobusAccessToken(); - } - + private String retrieveGlobusAccessToken() { // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId); String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token"); @@ -101,33 +132,16 @@ private void validatePath(String relPath) throws IOException { // Call the Globus API to get the file size @Override - long retrieveSize() { + public long retrieveSizeFromMedia() { + parsePath(); + String globusAccessToken = retrieveGlobusAccessToken(); logger.info("GAT2: " + globusAccessToken); // Construct Globus URL URI absoluteURI = null; try { - int filenameStart = path.lastIndexOf("/") + 1; - String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3); - int pathStart = endpointWithBasePath.indexOf("/"); - logger.info("endpointWithBasePath: " + endpointWithBasePath); - String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); - logger.info("directoryPath: " + directoryPath); - - if (dataverseManaged && (dvObject!=null)) { - Dataset ds = ((DataFile) dvObject).getOwner(); - directoryPath = directoryPath + "/" + ds.getAuthority() + "/" + ds.getIdentifier(); - logger.info("directoryPath now: " + directoryPath); - - } - if (filenameStart > 0) { - directoryPath = directoryPath + path.substring(0, filenameStart); - } - logger.info("directoryPath finally: " + directoryPath); - String filename = path.substring(filenameStart); - String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint - + "/ls?path=" + directoryPath + "&filter=name:" + filename); + + "/ls?path=" + endpointPath + relativeDirectoryPath + "&filter=name:" + filename); HttpGet get = new HttpGet(absoluteURI); logger.info("Token is " + globusAccessToken); @@ -166,25 +180,63 @@ public InputStream getInputStream() throws IOException { @Override public void delete() throws IOException { -// Fix - // Delete is best-effort - we tell the remote server and it may or may not - // implement this call + parsePath(); + // Delete is best-effort - we tell the endpoint to delete don't monitor whether + // it succeeds if (!isDirectAccess()) { throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); } + String globusAccessToken = retrieveGlobusAccessToken(); + // Construct Globus URL + URI absoluteURI = null; try { - HttpDelete del = new HttpDelete(baseUrl + "/" + path); - CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); - try { - int code = response.getStatusLine().getStatusCode(); - switch (code) { + + absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/submission_id"); + HttpGet get = new HttpGet(absoluteURI); + + logger.info("Token is " + globusAccessToken); + get.addHeader("Authorization", "Bearer " + globusAccessToken); + CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); + if (response.getStatusLine().getStatusCode() == 200) { + // Get reponse as string + String responseString = EntityUtils.toString(response.getEntity()); + logger.info("Response from " + get.getURI().toString() + " is: " + responseString); + JsonObject responseJson = JsonUtil.getJsonObject(responseString); + String submissionId = responseJson.getString("value"); + logger.info("submission_id for delete is: " + submissionId); + absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/delete"); + HttpPost post = new HttpPost(absoluteURI); + JsonObjectBuilder taskJsonBuilder = Json.createObjectBuilder(); + taskJsonBuilder.add("submission_id", submissionId).add("DATA_TYPE", "delete").add("endpoint", endpoint) + .add("DATA", Json.createArrayBuilder().add(Json.createObjectBuilder().add("DATA_TYPE", "delete_item").add("path", + endpointPath + relativeDirectoryPath + "/" + filename))); + post.setHeader("Content-Type", "application/json"); + post.addHeader("Authorization", "Bearer " + globusAccessToken); + String taskJson= JsonUtil.prettyPrint(taskJsonBuilder.build()); + logger.info("Sending: " + taskJson); + post.setEntity(new StringEntity(taskJson, "utf-8")); + CloseableHttpResponse postResponse = getSharedHttpClient().execute(post, localContext); + int statusCode=postResponse.getStatusLine().getStatusCode(); + logger.info("Response :" + statusCode + ": " +postResponse.getStatusLine().getReasonPhrase()); + switch (statusCode) { + case 202: + // ~Success - delete task was accepted + logger.info("Globus delete initiated: " + EntityUtils.toString(postResponse.getEntity())); + break; case 200: - logger.fine("Sent DELETE for " + baseUrl + "/" + path); + // Duplicate - delete task was already accepted + logger.info("Duplicate Globus delete: " + EntityUtils.toString(postResponse.getEntity())); + break; default: - logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); + logger.warning("Response from " + post.getURI().toString() + " was " + + postResponse.getStatusLine().getStatusCode()); + logger.info(EntityUtils.toString(postResponse.getEntity())); } - } finally { - EntityUtils.consume(response.getEntity()); + + } else { + logger.warning("Response from " + get.getURI().toString() + " was " + + response.getStatusLine().getStatusCode()); + logger.info(EntityUtils.toString(response.getEntity())); } } catch (Exception e) { logger.warning(e.getMessage()); @@ -250,6 +302,16 @@ static boolean isValidIdentifier(String driverId, String storageId) { return true; } + @Override + public String getStorageLocation() throws IOException { + parsePath(); + if (dataverseManaged) { + return this.driverId + DataAccess.SEPARATOR + relativeDirectoryPath + "/" + filename; + } else { + return super.getStorageLocation(); + } + } + public static void main(String[] args) { System.out.println("Running the main method"); if (args.length > 0) { @@ -272,7 +334,7 @@ public static void main(String[] args) { try { GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO( "globus://1234///hdc1/image001.mrc", "globus"); - logger.info("Size is " + gsio.retrieveSize()); + logger.info("Size is " + gsio.retrieveSizeFromMedia()); } catch (IOException e) { // TODO Auto-generated catch block @@ -286,7 +348,7 @@ public static void main(String[] args) { df.setOwner(ds); df.setStorageIdentifier("globus://1234///hdc1/image001.mrc"); GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO(df, null, "globus"); - logger.info("Size2 is " + gsio.retrieveSize()); + logger.info("Size2 is " + gsio.retrieveSizeFromMedia()); } catch (IOException e) { // TODO Auto-generated catch block @@ -294,4 +356,5 @@ public static void main(String[] args) { } } + } From bdba5d8ef8a459314d5b8dccab30190461bbfdea Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 14:03:46 -0400 Subject: [PATCH 063/414] implement signedUrls for globus app, refactor --- .../edu/harvard/iq/dataverse/api/Admin.java | 8 +- .../harvard/iq/dataverse/api/Datasets.java | 75 ++++++++++++++++++- .../edu/harvard/iq/dataverse/api/Files.java | 6 +- .../externaltools/ExternalToolHandler.java | 71 +----------------- .../dataverse/globus/GlobusServiceBean.java | 58 +++++++------- .../iq/dataverse/util/URLTokenUtil.java | 65 ++++++++++++++++ src/main/java/propertyFiles/Bundle.properties | 4 +- .../ExternalToolHandlerTest.java | 11 +-- .../ExternalToolServiceBeanTest.java | 4 +- 9 files changed, 192 insertions(+), 110 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index fd3b9a89e54..1870c7cb508 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -96,7 +96,6 @@ import edu.harvard.iq.dataverse.engine.command.impl.DeleteRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteTemplateCommand; import edu.harvard.iq.dataverse.engine.command.impl.RegisterDvObjectCommand; -import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.userdata.UserListMaker; @@ -105,6 +104,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import java.io.IOException; @@ -2418,12 +2418,12 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur } String baseUrl = urlInfo.getString("url"); - int timeout = urlInfo.getInt(ExternalToolHandler.TIMEOUT, 10); - String method = urlInfo.getString(ExternalToolHandler.HTTP_METHOD, "GET"); + int timeout = urlInfo.getInt(URLTokenUtil.TIMEOUT, 10); + String method = urlInfo.getString(URLTokenUtil.HTTP_METHOD, "GET"); String signedUrl = UrlSignerUtil.signUrl(baseUrl, timeout, userId, method, key); - return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl)); + return ok(Json.createObjectBuilder().add(URLTokenUtil.SIGNED_URL, signedUrl)); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index d3ea1b80696..aad5a95bd8e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -90,6 +90,7 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.MarkupChecker; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; @@ -3328,7 +3329,7 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam(" @POST @AuthRequired - @Path("{id}/addglobusFiles") + @Path("{id}/addGlobusFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @@ -3411,6 +3412,74 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } + /** + * Retrieve the parameters and signed URLs required to perform a globus + * transfer. This api endpoint is expected to be called as a signed callback + * after the globus-dataverse app/other app is launched, but it will accept + * other forms of authentication. + * + * @param crc + * @param datasetId + */ + @GET + @AuthRequired + @Path("{id}/globusUploadParameters") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @QueryParam(value = "locale") String locale) + { + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse e) { + return e.getResponse(); + } + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + String storeId = dataset.getEffectiveStorageDriverId(); + if(!DataAccess.getDriverType(storeId).equals(DataAccess.GLOBUS)) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); + } + boolean managed = GlobusOverlayAccessIO.isDataverseManaged(storeId); + + JsonObjectBuilder queryParams = Json.createObjectBuilder(); + queryParams.add("queryParameters", + Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}")) + .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}")) + .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}")) + .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}")) + .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}").add("managed", managed))); + + JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusTransferPaths") + .add(URLTokenUtil.TIMEOUT, 300)); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles") + .add(URLTokenUtil.TIMEOUT, 300)); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing") + .add(URLTokenUtil.HTTP_METHOD, "GET") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files") + .add(URLTokenUtil.TIMEOUT, 300)); + + + URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale); + return ok(tokenUtil.createPostBody(tokenUtil.getParams(queryParams.build()), allowedApiCalls.build())); + } + /** Requests permissions for a given globus user to upload to the dataset * * @param crc @@ -3915,8 +3984,8 @@ public Response getExternalToolDVParams(@Context ContainerRequestContext crc, } - ExternalToolHandler eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale); - return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + URLTokenUtil eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()))); } catch (WrappedResponse wr) { return wr.getResponse(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 82811162d52..4c2fa8f68ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -48,6 +48,8 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; @@ -822,10 +824,10 @@ public Response getExternalToolFMParams(@Context ContainerRequestContext crc, @P return error(BAD_REQUEST, "FileMetadata not found."); } - ExternalToolHandler eth = null; + URLTokenUtil eth = null; eth = new ExternalToolHandler(externalTool, target.getDataFile(), apiToken, target, locale); - return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()))); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index de4317464e6..36227c2f883 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -22,12 +22,8 @@ import java.util.logging.Level; import java.util.logging.Logger; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonNumber; import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonString; import jakarta.json.JsonValue; import jakarta.ws.rs.HttpMethod; @@ -41,15 +37,10 @@ */ public class ExternalToolHandler extends URLTokenUtil { - private final ExternalTool externalTool; + public final ExternalTool externalTool; private String requestMethod; - - public static final String HTTP_METHOD="httpMethod"; - public static final String TIMEOUT="timeOut"; - public static final String SIGNED_URL="signedUrl"; - public static final String NAME="name"; - public static final String URL_TEMPLATE="urlTemplate"; + /** @@ -134,10 +125,10 @@ public String handleRequest(boolean preview) { } else { // ToDo - if the allowedApiCalls() are defined, could/should we send them to - // tools using GET as well? + // tools using POST as well? if (requestMethod.equals(HttpMethod.POST)) { - String body = JsonUtil.prettyPrint(createPostBody(params).build()); + String body = JsonUtil.prettyPrint(createPostBody(params, null).build()); try { logger.info("POST Body: " + body); return postFormData(body); @@ -149,60 +140,6 @@ public String handleRequest(boolean preview) { return null; } - public JsonObject getParams(JsonObject toolParameters) { - //ToDo - why an array of object each with a single key/value pair instead of one object? - JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); - - // ToDo return json and print later - JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); - if (!(queryParams == null) && !queryParams.isEmpty()) { - queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { - queryParam.keySet().forEach((key) -> { - String value = queryParam.getString(key); - JsonValue param = getParam(value); - if (param != null) { - paramsBuilder.add(key, param); - } - }); - }); - } - return paramsBuilder.build(); - } - - public JsonObjectBuilder createPostBody(JsonObject params) { - JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); - bodyBuilder.add("queryParameters", params); - String apiCallStr = externalTool.getAllowedApiCalls(); - if (apiCallStr != null && !apiCallStr.isBlank()) { - JsonArray apiArray = JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()); - JsonArrayBuilder apisBuilder = Json.createArrayBuilder(); - apiArray.getValuesAs(JsonObject.class).forEach(((apiObj) -> { - logger.fine(JsonUtil.prettyPrint(apiObj)); - String name = apiObj.getJsonString(NAME).getString(); - String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString(); - int timeout = apiObj.getInt(TIMEOUT); - String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString(); - logger.fine("URL Template: " + urlTemplate); - urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate; - String apiPath = replaceTokensWithValues(urlTemplate); - logger.fine("URL WithTokens: " + apiPath); - String url = apiPath; - // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) - ApiToken apiToken = getApiToken(); - if (apiToken != null) { - url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), - httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") - + getApiToken().getTokenString()); - } - logger.fine("Signed URL: " + url); - apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod) - .add(SIGNED_URL, url).add(TIMEOUT, timeout)); - })); - bodyBuilder.add("signedUrls", apisBuilder); - } - return bodyBuilder; - } - private String postFormData(String allowedApis) throws IOException, InterruptedException { String url = null; HttpClient client = HttpClient.newHttpClient(); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 8aa9915db58..2c0edd070f3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -20,6 +20,7 @@ import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonPatch; import jakarta.servlet.http.HttpServletRequest; +import jakarta.ws.rs.HttpMethod; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -45,6 +46,8 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.apache.commons.codec.binary.StringUtils; + import com.google.gson.Gson; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -58,6 +61,7 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; @Stateless @@ -120,7 +124,6 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list"); MakeRequestResponse result = makeRequest(url, "Bearer", endpoint.getClientToken(), "GET", null); - ArrayList ids = new ArrayList(); if (result.status == 200) { AccessList al = parseJson(result.jsonResponse, AccessList.class, false); @@ -282,7 +285,7 @@ private String getUniqueFilePath(GlobusEndpoint endpoint) { //Single cache of open rules/permission requests private final Cache rulesCache = Caffeine.newBuilder() .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) - .removalListener((ruleId, datasetId, cause) -> { + .evictionListener((ruleId, datasetId, cause) -> { //Delete rules that expire Dataset dataset = datasetSvc.find(datasetId); deletePermission((String) ruleId, dataset, null); @@ -575,12 +578,23 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) } catch (Exception e) { logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId); } - //Use URLTokenUtil for params currently in common with external tools. + // Use URLTokenUtil for params currently in common with external tools. URLTokenUtil tokenUtil = new URLTokenUtil(d, df, apiToken, localeCode); String appUrl; if (upload) { appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + String callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId() + + "/globusUploadParameters?locale=" + localeCode; + if (apiToken != null) { + callback = UrlSignerUtil.signUrl(callback, 5, apiToken.getAuthenticatedUser().getUserIdentifier(), + HttpMethod.GET, + JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + apiToken.getTokenString()); + } else { + // Shouldn't happen + logger.warning("unable to get api token for user: " + user.getIdentifier()); + } + appUrl = appUrl + "&callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback)); } else { if (df == null) { appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") @@ -637,39 +651,27 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S globusLogger = logger; } - globusLogger.info("Starting an globusUpload "); + Thread.sleep(5000); - - // ToDo - use DataAccess methods? - //String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); - //datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); - logger.fine("json: " + JsonUtil.prettyPrint(jsonData)); String taskIdentifier = jsonData.getString("taskIdentifier"); - String ruleId = null; - - Thread.sleep(5000); - // globus task status check GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); + globusLogger.info("Starting an globusUpload "); + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - - ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); - - if(ruleId!=null) { + String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + logger.info("Found rule: " + ruleId); + if (ruleId != null) { Long datasetId = rulesCache.getIfPresent(ruleId); - if(datasetId!=null) { - - //Will delete rule - rulesCache.invalidate(ruleId); - } else { - //The cache already expired this rule, in which case it's delay not long enough, or we have some other problem - logger.warning("Rule " + ruleId + " not found in rulesCache"); - deletePermission(ruleId, dataset, globusLogger); + if (datasetId != null) { + + // Will delete rule + rulesCache.invalidate(ruleId); } } @@ -836,6 +838,10 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); } } + if (ruleId != null) { + deletePermission(ruleId, dataset, globusLogger); + globusLogger.info("Removed upload permission: " + ruleId); + } } public String addFilesAsync(String curlCommand, Logger globusLogger) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index 4ae76a7b8db..216237105aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -6,6 +6,10 @@ import java.util.regex.Pattern; import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonValue; import edu.harvard.iq.dataverse.DataFile; @@ -13,6 +17,8 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.json.JsonUtil; public class URLTokenUtil { @@ -22,6 +28,13 @@ public class URLTokenUtil { protected final FileMetadata fileMetadata; protected ApiToken apiToken; protected String localeCode; + + + public static final String HTTP_METHOD="httpMethod"; + public static final String TIMEOUT="timeOut"; + public static final String SIGNED_URL="signedUrl"; + public static final String NAME="name"; + public static final String URL_TEMPLATE="urlTemplate"; /** * File level @@ -193,6 +206,58 @@ private String getTokenValue(String value) { throw new IllegalArgumentException("Cannot replace reserved word: " + value); } + public JsonObjectBuilder createPostBody(JsonObject params, JsonArray allowedApiCalls) { + JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); + bodyBuilder.add("queryParameters", params); + if (allowedApiCalls != null && !allowedApiCalls.isEmpty()) { + JsonArrayBuilder apisBuilder = Json.createArrayBuilder(); + allowedApiCalls.getValuesAs(JsonObject.class).forEach(((apiObj) -> { + logger.fine(JsonUtil.prettyPrint(apiObj)); + String name = apiObj.getJsonString(NAME).getString(); + String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString(); + int timeout = apiObj.getInt(TIMEOUT); + String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString(); + logger.fine("URL Template: " + urlTemplate); + urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate; + String apiPath = replaceTokensWithValues(urlTemplate); + logger.fine("URL WithTokens: " + apiPath); + String url = apiPath; + // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) + ApiToken apiToken = getApiToken(); + if (apiToken != null) { + url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), + httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + + getApiToken().getTokenString()); + } + logger.fine("Signed URL: " + url); + apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod) + .add(SIGNED_URL, url).add(TIMEOUT, timeout)); + })); + bodyBuilder.add("signedUrls", apisBuilder); + } + return bodyBuilder; + } + + public JsonObject getParams(JsonObject toolParameters) { + //ToDo - why an array of object each with a single key/value pair instead of one object? + JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); + + // ToDo return json and print later + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); + if (!(queryParams == null) && !queryParams.isEmpty()) { + queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { + queryParam.keySet().forEach((key) -> { + String value = queryParam.getString(key); + JsonValue param = getParam(value); + if (param != null) { + paramsBuilder.add(key, param); + } + }); + }); + } + return paramsBuilder.build(); + } + public static String getScriptForUrl(String url) { String msg = BundleUtil.getStringFromBundle("externaltools.enable.browser.popups"); String script = "const newWin = window.open('" + url + "', target='_blank'); if (!newWin || newWin.closed || typeof newWin.closed == \"undefined\") {alert(\"" + msg + "\");}"; diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 3497b23eb94..88f819b417b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2645,8 +2645,8 @@ datasets.api.privateurl.anonymized.error.released=Can't create a URL for anonymi datasets.api.creationdate=Date Created datasets.api.modificationdate=Last Modified Date datasets.api.curationstatus=Curation Status -datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this installation of Dataverse. -datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this installation of Dataverse. +datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this dataset. +datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this dataset. diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java index 21bb6633204..6f0132e2bc9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; @@ -53,7 +54,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { Exception expectedException1 = null; String nullLocaleCode = null; try { - ExternalToolHandler externalToolHandler1 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); + URLTokenUtil externalToolHandler1 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); } catch (Exception ex) { expectedException1 = ex; } @@ -71,7 +72,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { DataFile dataFile = new DataFile(); dataFile.setId(42l); try { - ExternalToolHandler externalToolHandler1 = new ExternalToolHandler(externalTool, dataFile, nullApiToken, nullFileMetadata, nullLocaleCode); + URLTokenUtil externalToolHandler1 = new ExternalToolHandler(externalTool, dataFile, nullApiToken, nullFileMetadata, nullLocaleCode); } catch (Exception ex) { expectedException1 = ex; } @@ -92,7 +93,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { .build().toString()); Exception expectedException2 = null; try { - ExternalToolHandler externalToolHandler2 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); + URLTokenUtil externalToolHandler2 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); } catch (Exception ex) { expectedException2 = ex; } @@ -225,10 +226,10 @@ public void testGetToolUrlWithAllowedApiCalls() { assertTrue(et != null); System.out.println("allowedApiCalls et created"); System.out.println(et.getAllowedApiCalls()); - ExternalToolHandler externalToolHandler = new ExternalToolHandler(et, ds, at, null); + URLTokenUtil externalToolHandler = new ExternalToolHandler(et, ds, at, null); System.out.println("allowedApiCalls eth created"); JsonObject jo = externalToolHandler - .createPostBody(externalToolHandler.getParams(JsonUtil.getJsonObject(et.getToolParameters()))).build(); + .createPostBody(externalToolHandler.getParams(JsonUtil.getJsonObject(et.getToolParameters())), JsonUtil.getJsonArray(et.getAllowedApiCalls())).build(); assertEquals(1, jo.getJsonObject("queryParameters").getInt("datasetId")); String signedUrl = jo.getJsonArray("signedUrls").getJsonObject(0).getString("signedUrl"); // The date and token will change each time but check for the constant parts of diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java index 9337949f605..4f5af8b97b0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java @@ -9,6 +9,8 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import java.util.ArrayList; import java.util.List; import jakarta.json.Json; @@ -49,7 +51,7 @@ public void testfindAll() { externalToolTypes.add(externalToolType); ExternalTool.Scope scope = ExternalTool.Scope.FILE; ExternalTool externalTool = new ExternalTool("displayName", "toolName", "description", externalToolTypes, scope, "http://foo.com", "{}", DataFileServiceBean.MIME_TYPE_TSV_ALT); - ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null); + URLTokenUtil externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null); List externalTools = new ArrayList<>(); externalTools.add(externalTool); List availableExternalTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); From f056d6c051bf784ca4808e8757efa9afcaf7778c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 19 Oct 2023 15:10:14 -0400 Subject: [PATCH 064/414] minor incremental changes (#9635) --- .../search/SearchIncludeFragment.java | 30 +++++++++++++++---- .../dataverse/search/SearchServiceBean.java | 6 ++-- .../dataverse/search/SolrQueryResponse.java | 10 ++++++- src/main/webapp/search-include-fragment.xhtml | 24 +++++++++++++-- 4 files changed, 57 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 47a5621c3d6..14274a09399 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -131,7 +131,8 @@ public class SearchIncludeFragment implements java.io.Serializable { Map datasetfieldFriendlyNamesBySolrField = new HashMap<>(); Map staticSolrFieldFriendlyNamesBySolrField = new HashMap<>(); private boolean solrIsDown = false; - private boolean solrIsOverloaded = false; + private boolean solrIsTemporarilyUnavailable = false; + private boolean solrFacetsDisabled = false; private Map numberOfFacets = new HashMap<>(); // private boolean showUnpublished; List filterQueriesDebug = new ArrayList<>(); @@ -361,6 +362,14 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused if (solrQueryResponse.hasError()){ logger.info(solrQueryResponse.getError()); setSolrErrorEncountered(true); + } + // Solr "temporarily unavailable" is the condition triggered by + // receiving a 503 from the search engine, that is in turn a result + // of one of the Solr "circuit breakers" being triggered by excessive + // load. We treat this condition as distinct from "Solr is down", + // on the assumption that it is transitive. + if (solrQueryResponse.isSolrTemporarilyUnavailable()) { + setSolrTemporarilyUnavailable(true); } // This 2nd search() is for populating the "type" ("dataverse", "dataset", "file") facets: -- L.A. // (why exactly do we need it, again?) @@ -386,7 +395,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } - if (selectedTypesList.size() < 3) { + if (selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable()) { // If some types are NOT currently selected, we will need to // run another query to obtain the numbers of the unselected types: @@ -1079,14 +1088,23 @@ public void setSolrIsDown(boolean solrIsDown) { this.solrIsDown = solrIsDown; } - public boolean isSolrOverloaded() { - return solrIsOverloaded; + public boolean isSolrTemporarilyUnavailable() { + return solrIsTemporarilyUnavailable; } - public void setSolrIsOverloaded(boolean solrIsOverloaded) { - this.solrIsOverloaded = solrIsOverloaded; + public void setSolrTemporarilyUnavailable(boolean solrIsTemporarilyUnavailable) { + this.solrIsTemporarilyUnavailable = solrIsTemporarilyUnavailable; } + public boolean isFacetsDisabled() { + return solrFacetsDisabled; + } + + public void setFacetsDisabled(boolean solrFacetsDisabled) { + this.solrFacetsDisabled = solrFacetsDisabled; + } + + public boolean isRootDv() { return rootDv; } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 1b92c2a4a46..6e410488794 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -382,7 +382,6 @@ public SolrQueryResponse search( // Make the solr query // ----------------------------------- QueryResponse queryResponse = null; - boolean solrTemporarilyUnavailable = false; try { queryResponse = solrClientService.getSolrClient().query(solrQuery); @@ -397,6 +396,8 @@ public SolrQueryResponse search( logger.info("return code: "+queryResponse.getStatus()); } + SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery); + // We probably shouldn't be assuming that this is necessarily a // "search syntax error", as the code below implies - could be // something else too - ? @@ -407,9 +408,9 @@ public SolrQueryResponse search( // a transient condition): if (ex.code() == 503) { - solrTemporarilyUnavailable = true; // actual logic for communicating this state back to the local // client code TBD (@todo) + exceptionSolrQueryResponse.setSolrTemporarilyUnavailable(true); } String error = "Search Syntax Error: "; @@ -421,7 +422,6 @@ public SolrQueryResponse search( error += messageFromSolr; } logger.info(error); - SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery); exceptionSolrQueryResponse.setError(error); // we can't show anything because of the search syntax error diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java index 893099ff08d..27e79cb1fc2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java @@ -26,6 +26,7 @@ public class SolrQueryResponse { private String error; private Map dvObjectCounts = new HashMap<>(); private Map publicationStatusCounts = new HashMap<>(); + private boolean solrTemporarilyUnavailable = false; public static String DATAVERSES_COUNT_KEY = "dataverses_count"; public static String DATASETS_COUNT_KEY = "datasets_count"; @@ -91,7 +92,14 @@ public JsonObjectBuilder getPublicationStatusCountsAsJSON(){ } return this.getMapCountsAsJSON(publicationStatusCounts); } - + + public boolean isSolrTemporarilyUnavailable() { + return solrTemporarilyUnavailable; + } + + public void setSolrTemporarilyUnavailable(boolean solrTemporarilyUnavailable) { + this.solrTemporarilyUnavailable = solrTemporarilyUnavailable; + } public JsonObjectBuilder getDvObjectCountsAsJSON(){ diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index 718df813348..8397a14136e 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -88,12 +88,24 @@
+ + + +
+
+
+ + +
+
+
+
-
+
#{msg.rendered()} From 00a17071c358b7ebee09e77130cb7319c665dfb5 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 10 Nov 2023 13:38:36 -0500 Subject: [PATCH 134/414] Revert "allow longer custom questions" This reverts commit ba4d178f5c541ec88ea0879ec5c715bda529f2c9. --- src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java b/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java index d880da5b4a8..2cb6f27c3e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java +++ b/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java @@ -2,7 +2,7 @@ import java.io.Serializable; import java.util.List; import jakarta.persistence.*; -import jakarta.validation.constraints.NotBlank; +import org.hibernate.validator.constraints.NotBlank; /** * @@ -41,7 +41,7 @@ public void setId(Long id) { private String questionType; @NotBlank(message = "{custom.questiontext}") - @Column( nullable = false, columnDefinition = "TEXT") + @Column( nullable = false ) private String questionString; private boolean required; From d3fbee58262ac439a0b10f4ca7e1494dea4a6c5d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 10 Nov 2023 13:38:43 -0500 Subject: [PATCH 135/414] Revert "add return null if commandexception" This reverts commit aa7eceeb762eca045127cf91acb35d6c62b00d79. --- src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java index 8b09291d052..9fb584a9133 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java @@ -320,7 +320,7 @@ public String save() { logger.info("Guestbook Page Command Exception. Dataverse: " + dataverse.getName()); logger.info(ex.toString()); FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_FATAL, BundleUtil.getStringFromBundle("guestbook.save.fail"), " - " + ex.toString())); - return null; + //logger.severe(ex.getMessage()); } editMode = null; String msg = (create)? BundleUtil.getStringFromBundle("guestbook.create"): BundleUtil.getStringFromBundle("guestbook.save"); From 4b347c7ec13591ba38ffa55fbde394cce2b8bcfe Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 10 Nov 2023 17:47:17 -0500 Subject: [PATCH 136/414] doc update --- .../source/developers/big-data-support.rst | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 1917967b3f3..d38f7f27a68 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -149,20 +149,30 @@ Globus File Transfer Note: Globus file transfer is still experimental but feedback is welcome! See :ref:`support`. -Users can transfer files via `Globus `_ into and out of datasets when their Dataverse installation is configured to use a Globus accessible S3 store and a community-developed `dataverse-globus `_ "transfer" app has been properly installed and configured. +Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) +and a community-developed `dataverse-globus `_ app has been properly installed and configured. Due to differences in the access control models of a Dataverse installation and Globus, enabling the Globus capability on a store will disable the ability to restrict and embargo files in that store. -As Globus aficionados know, Globus endpoints can be in a variety of places, from data centers to personal computers. This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa). +Globus endpoints can be in a variety of places, from data centers to personal computers. +This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa). -Globus transfer uses a very efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: +Globus transfer uses an efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: * robust file transfer capable of restarting after network or endpoint failures * third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation -Globus transfer requires use of the Globus S3 connector which requires a paid Globus subscription at the host institution. Users will need a Globus account which could be obtained via their institution or directly from Globus (at no cost). +Dataverse supports three options for using Globus, two involving transfer to Dataverse-managed endpoints and one allowing Dataverse to reference files on remote endpoints. +Dataverse-managed endpoints must be Globus 'guest collections' hosted on either a file-system-based endpoint or an S3-based endpoint (the latter requires use of the Globus +S3 connector which requires a paid Globus subscription at the host institution). In either case, Dataverse is configured with the Globus credentials of a user account that can manage the endpoint. +Users will need a Globus account, which can be obtained via their institution or directly from Globus (at no cost). -The setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. +For the reference use case, Dataverse must be configured with a list of allowed endpoint/base paths from which files may be referenced. In this case, since Dataverse is not accessing the remote endpoint itself, it does not need Globus credentials. +Users will need a Globus account in this case, and the remote endpoint must be configured to allow them access (i.e. be publicly readable, or potentially involving some out-of-band mechanism to request access (that could be described in the dataset's Terms of Use and Access). + +All of Dataverse's Globus capabilities are now store-based (see the store documentation) and therefore different collections/datasets can be configured to use different Globus-capable stores (or normal file, S3 stores, etc.) + +More details of the setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. As described in that document, Globus transfers can be initiated by choosing the Globus option in the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, "Globus Transfer" is one of the download options in the "Access Dataset" menu and optionally the file landing page download menu (if/when supported in the dataverse-globus app). From 6ad55eb689071921857a9f97135e97dd2e71c076 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 10 Nov 2023 17:50:16 -0500 Subject: [PATCH 137/414] Support multiple ref endpoints for non-managed case --- .../harvard/iq/dataverse/api/Datasets.java | 72 ++++---- .../dataaccess/GlobusAccessibleStore.java | 14 +- .../dataaccess/GlobusOverlayAccessIO.java | 166 +++++++++++++----- .../dataaccess/RemoteOverlayAccessIO.java | 47 +++-- .../dataverse/globus/GlobusServiceBean.java | 31 +++- 5 files changed, 226 insertions(+), 104 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index b1c528f3fd9..a57f373f106 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3601,13 +3601,11 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat } JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); - if (managed) { - - allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths") + String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths"; + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, requestCallName) .add(URLTokenUtil.HTTP_METHOD, "POST") - .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusTransferPaths") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusPaths") .add(URLTokenUtil.TIMEOUT, 300)); - } allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles") .add(URLTokenUtil.HTTP_METHOD, "POST") .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles") @@ -3632,7 +3630,7 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat */ @POST @AuthRequired - @Path("{id}/requestGlobusTransferPaths") + @Path("{id}/requestGlobusPaths") @Consumes(MediaType.APPLICATION_JSON) @Produces(MediaType.APPLICATION_JSON) public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody @@ -3666,35 +3664,45 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP } catch (WrappedResponse wr) { return wr.getResponse(); } - - if(!GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { - return badRequest("This dataset does not have managed Globus storage"); - } - if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset) .canIssue(UpdateDatasetVersionCommand.class)) { - try { + JsonObject params = JsonUtil.getJsonObject(jsonBody); - String principal = params.getString("principal"); - int numberOfPaths = params.getInt("numberOfFiles"); - if(numberOfPaths <=0) { - return badRequest("numberOfFiles must be positive"); - } - - JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths); - switch (response.getInt("status")) { - case 201: - return ok(response.getJsonObject("paths")); - case 400: - return badRequest("Unable to grant permission"); - case 409: - return conflict("Permission already exists"); - default: - return error(null, "Unexpected error when granting permission"); - } - } catch (NullPointerException|ClassCastException e) { - return badRequest("Error retrieving principal and numberOfFiles from JSON request body"); - + if (!GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { + try { + JsonArray referencedFiles = params.getJsonArray("referencedFiles"); + if (referencedFiles == null || referencedFiles.size() == 0) { + return badRequest("No referencedFiles specified"); + } + JsonObject fileMap = globusService.requestReferenceFileIdentifiers(dataset, referencedFiles); + return (ok(fileMap)); + } catch (Exception e) { + return badRequest(e.getLocalizedMessage()); + } + } else { + try { + String principal = params.getString("principal"); + int numberOfPaths = params.getInt("numberOfFiles"); + if (numberOfPaths <= 0) { + return badRequest("numberOfFiles must be positive"); + } + + JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths); + switch (response.getInt("status")) { + case 201: + return ok(response.getJsonObject("paths")); + case 400: + return badRequest("Unable to grant permission"); + case 409: + return conflict("Permission already exists"); + default: + return error(null, "Unexpected error when granting permission"); + } + + } catch (NullPointerException | ClassCastException e) { + return badRequest("Error retrieving principal and numberOfFiles from JSON request body"); + + } } } else { return forbidden("User doesn't have permission to upload to this dataset"); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java index 1d98044b2b5..afc7556481a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -7,8 +7,7 @@ public interface GlobusAccessibleStore { static final String MANAGED = "managed"; - static final String GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH = "globus-transfer-endpoint-with-basepath"; - static final String GLOBUS_REFERENCE_ENDPOINTS_WITH_BASEPATHS = "globus-reference-endpoints-with-basepaths"; + static final String TRANSFER_ENDPOINT_WITH_BASEPATH = "transfer-endpoint-with-basepath"; static final String GLOBUS_TOKEN = "globus-token"; public static boolean isDataverseManaged(String driverId) { @@ -16,37 +15,36 @@ public static boolean isDataverseManaged(String driverId) { } public static String getTransferEndpointId(String driverId) { - String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH); + String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH); int pathStart = endpointWithBasePath.indexOf("/"); return pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; } public static String getTransferPath(String driverId) { - String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH); + String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH); int pathStart = endpointWithBasePath.indexOf("/"); return pathStart > 0 ? endpointWithBasePath.substring(pathStart) : ""; } public static JsonArray getReferenceEndpointsWithPaths(String driverId) { - String[] endpoints = StorageIO.getConfigParamForDriver(driverId, GLOBUS_REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*"); + String[] endpoints = StorageIO.getConfigParamForDriver(driverId, RemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*"); JsonArrayBuilder builder = Json.createArrayBuilder(); for(int i=0;i/// * - * baseUrl: globus:// + * transfer and reference endpoint formats: + * reference endpoints separated by a comma * */ public class GlobusOverlayAccessIO extends RemoteOverlayAccessIO implements GlobusAccessibleStore { @@ -50,7 +53,7 @@ public class GlobusOverlayAccessIO extends RemoteOverlayAcce * Dataverse/the globus app manage file locations, access controls, deletion, * etc. */ - private boolean dataverseManaged = false; + private Boolean dataverseManaged = null; private String relativeDirectoryPath; @@ -58,22 +61,59 @@ public class GlobusOverlayAccessIO extends RemoteOverlayAcce private String filename; + private String[] allowedEndpoints; private String endpoint; public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); - dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId); } + + public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { + this.driverId = driverId; + configureStores(null, driverId, storageLocation); + if (isManaged()) { + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); + path = parts[1]; + } else { + this.setIsLocalFile(false); + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Referenced path: " + path); + } + } + private boolean isManaged() { + if(dataverseManaged==null) { + dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId); + } + return dataverseManaged; + } + + private String retrieveGlobusAccessToken() { + String globusToken = getConfigParam(GlobusAccessibleStore.GLOBUS_TOKEN); + + + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); + return accessToken.getOtherTokens().get(0).getAccessToken(); + } + + private void parsePath() { int filenameStart = path.lastIndexOf("/") + 1; - String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf(DataAccess.SEPARATOR) + 3); + String endpointWithBasePath = null; + if (!isManaged()) { + endpointWithBasePath = findMatchingEndpoint(path, allowedEndpoints); + } else { + endpointWithBasePath = allowedEndpoints[0]; + } + //String endpointWithBasePath = baseEndpointPath.substring(baseEndpointPath.lastIndexOf(DataAccess.SEPARATOR) + 3); int pathStart = endpointWithBasePath.indexOf("/"); logger.info("endpointWithBasePath: " + endpointWithBasePath); endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); logger.info("endpointPath: " + endpointPath); + - if (dataverseManaged && (dvObject!=null)) { + if (isManaged() && (dvObject!=null)) { Dataset ds = null; if (dvObject instanceof Dataset) { @@ -95,40 +135,36 @@ private void parsePath() { } - public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { - this.driverId = driverId; - configureStores(null, driverId, storageLocation); - this.dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId); - if (dataverseManaged) { - String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); - path = parts[1]; - } else { - this.setIsLocalFile(false); - path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); - validatePath(path); - logger.fine("Relative path: " + path); + private static String findMatchingEndpoint(String path, String[] allowedEndpoints) { + for(int i=0;i 0) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index aafab038ae2..5463254140d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -65,7 +65,10 @@ public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); + // A single baseUrl of the form http(s):// where this store can reference data static final String BASE_URL = "base-url"; + // Multiple endpoints where data can be referenced from. Multiple endpoints are separated by a comma. Multiple endpoints are only supported by the GlobalOverlayAccessIO at present. + static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; static final String BASE_STORE = "base-store"; static final String SECRET_KEY = "secret-key"; static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; @@ -74,7 +77,7 @@ public class RemoteOverlayAccessIO extends StorageIO { protected StorageIO baseStore = null; protected String path = null; - protected String baseUrl = null; + private String baseUrl = null; protected static HttpClientContext localContext = HttpClientContext.create(); protected PoolingHttpClientConnectionManager cm = null; @@ -110,7 +113,7 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE logger.fine("Relative path: " + path); } - private void validatePath(String relPath) throws IOException { + protected void validatePath(String relPath) throws IOException { try { URI absoluteURI = new URI(baseUrl + "/" + relPath); if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { @@ -457,19 +460,8 @@ int getUrlExpirationMinutes() { } protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - baseUrl = getConfigParam(BASE_URL); - if (baseUrl == null) { - throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); - } else { - try { - new URI(baseUrl); - } catch (Exception e) { - logger.warning( - "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); - throw new IOException("Can't interpret base-url as a URI"); - } - - } + configureEndpoints(); + if (baseStore == null) { String baseDriverId = getBaseStoreIdFor(driverId); @@ -543,6 +535,31 @@ protected void configureStores(DataAccessRequest req, String driverId, String st } } + /** This endpoint configures all the endpoints the store is allowed to reference data from. At present, the RemoteOverlayAccessIO only supports a single endpoint but + * the derived GlobusOverlayAccessIO can support multiple endpoints. + * @throws IOException + */ + protected void configureEndpoints() throws IOException { + baseUrl = getConfigParam(BASE_URL); + if (baseUrl == null) { + //Will accept the first endpoint using the newer setting + baseUrl = getConfigParam(REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*")[0]; + if (baseUrl == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); + } + } + if (baseUrl != null) { + try { + new URI(baseUrl); + } catch (Exception e) { + logger.warning( + "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); + throw new IOException("Can't interpret base-url as a URI"); + } + + } + } + // Convenience method to assemble the path, starting with the DOI // authority/identifier/, that is needed to create a base store via // DataAccess.getDirectStorageIO - the caller has to add the store type specific diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index dab0e36852c..3dee3bd498f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -6,7 +6,6 @@ import com.google.gson.FieldNamingPolicy; import com.google.gson.GsonBuilder; import edu.harvard.iq.dataverse.*; - import jakarta.ejb.Asynchronous; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; @@ -20,6 +19,8 @@ import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonPatch; +import jakarta.json.JsonString; +import jakarta.json.JsonValue.ValueType; import jakarta.json.stream.JsonParsingException; import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.HttpMethod; @@ -57,7 +58,6 @@ import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; -import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -284,6 +284,33 @@ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int return response.build(); } + public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) { + String driverId = dataset.getEffectiveStorageDriverId(); + JsonArray endpoints = GlobusAccessibleStore.getReferenceEndpointsWithPaths(driverId); + + JsonObjectBuilder fileMap = Json.createObjectBuilder(); + referencedFiles.forEach(value -> { + if (value.getValueType() != ValueType.STRING) { + throw new JsonParsingException("ReferencedFiles must be strings", null); + } + String referencedFile = ((JsonString) value).getString(); + boolean valid = false; + for (int i = 0; i < endpoints.size(); i++) { + if (referencedFile.startsWith(((JsonString) endpoints.get(i)).getString())) { + valid = true; + } + } + if (!valid) { + throw new IllegalArgumentException( + "Referenced file " + referencedFile + " is not in an allowed endpoint/path"); + } + String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId); + fileMap.add(referencedFile, + storageIdentifier + "//" + referencedFile); + }); + return fileMap.build(); + } + //Single cache of open rules/permission requests private final Cache rulesCache = Caffeine.newBuilder() .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) From 48f02dde7f22b21e28c8d635df904b79532f042a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 10 Nov 2023 17:56:56 -0500 Subject: [PATCH 138/414] handle file not found case --- .../iq/dataverse/dataaccess/GlobusOverlayAccessIO.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 0dec7133fb5..f42f5443108 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -25,6 +25,7 @@ import org.apache.http.util.EntityUtils; import jakarta.json.Json; +import jakarta.json.JsonArray; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; @@ -189,7 +190,11 @@ public long retrieveSizeFromMedia() { String responseString = EntityUtils.toString(response.getEntity()); logger.info("Response from " + get.getURI().toString() + " is: " + responseString); JsonObject responseJson = JsonUtil.getJsonObject(responseString); - return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size"); + JsonArray dataArray = responseJson.getJsonArray("DATA"); + if (dataArray != null && dataArray.size() != 0) { + //File found + return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size"); + } } else { logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode()); From c33f07aad938f4707e6985ddeeec801969e4a3fc Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Sat, 11 Nov 2023 14:38:00 -0500 Subject: [PATCH 139/414] Add logic to leave settings as found before test --- .../edu/harvard/iq/dataverse/api/ProvIT.java | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java index 3bfa3d72fbd..6b9b59f431d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java @@ -30,7 +30,12 @@ public static void setUpClass() { @Test public void testFreeformDraftActions() { - UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + boolean provEnabled = provCollectionStatus.getStatusCode() == 200; + if(!provEnabled){ + UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } + Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); createDepositor.then().assertThat() @@ -85,15 +90,20 @@ public void testFreeformDraftActions() { datasetVersions.prettyPrint(); datasetVersions.then().assertThat() .body("data[0].versionState", equalTo("DRAFT")); - - UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - + if(!provEnabled){ + UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } + } @Test public void testAddProvFile() { - UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + boolean provEnabled = provCollectionStatus.getStatusCode() == 200; + if(!provEnabled){ + UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); @@ -213,12 +223,8 @@ public void testAddProvFile() { deleteProvJson.then().assertThat() .statusCode(FORBIDDEN.getStatusCode()); //cannot delete json of a published dataset - UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); -// Command removed, redundant -// Response deleteProvFreeForm = UtilIT.deleteProvFreeForm(dataFileId.toString(), apiTokenForDepositor); -// deleteProvFreeForm.prettyPrint(); -// deleteProvFreeForm.then().assertThat() -// .statusCode(OK.getStatusCode()); - + if(!provEnabled){ + UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } } } From 3407fb9f813984c857ef7708af7d6dc239b8f8ee Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 13 Nov 2023 07:04:15 -0500 Subject: [PATCH 140/414] Add ProvIT to integration-tests.txt --- tests/integration-tests.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 18911b3164a..bb3bc7f9ce6 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,ProvIT From 2842cdaf246c531b04449ac4c8b20fc4a09c2668 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 13 Nov 2023 08:42:31 -0500 Subject: [PATCH 141/414] Move this change into BeforeAll/AfterAll --- .../edu/harvard/iq/dataverse/api/ProvIT.java | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java index 6b9b59f431d..69a87869fe1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java @@ -11,6 +11,9 @@ import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.AfterAll; + import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.notNullValue; @@ -20,22 +23,24 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; public class ProvIT { + + private static boolean provEnabled = false; @BeforeAll - public static void setUpClass() { + public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + + provEnabled = provCollectionStatus.getStatusCode() == 200; + if(!provEnabled){ + UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } } @Test public void testFreeformDraftActions() { - Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - boolean provEnabled = provCollectionStatus.getStatusCode() == 200; - if(!provEnabled){ - UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - } - Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); createDepositor.then().assertThat() @@ -90,20 +95,11 @@ public void testFreeformDraftActions() { datasetVersions.prettyPrint(); datasetVersions.then().assertThat() .body("data[0].versionState", equalTo("DRAFT")); - if(!provEnabled){ - UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - } - + } @Test - public void testAddProvFile() { - - Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - boolean provEnabled = provCollectionStatus.getStatusCode() == 200; - if(!provEnabled){ - UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - } + public void testAddProvFile() { Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); @@ -223,6 +219,11 @@ public void testAddProvFile() { deleteProvJson.then().assertThat() .statusCode(FORBIDDEN.getStatusCode()); //cannot delete json of a published dataset + + } + + @AfterAll + public static void tearDownClass() { if(!provEnabled){ UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); } From 437e7ccd480dbae405238faffb9fff8a8317218d Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 13 Nov 2023 09:56:16 -0500 Subject: [PATCH 142/414] #9464 remove unused import --- src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index fabb33e328a..557b7df202b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -128,7 +128,6 @@ import java.util.Optional; import java.util.stream.Collectors; import jakarta.servlet.http.HttpServletResponse; -import jakarta.validation.constraints.NotNull; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.StreamingOutput; From d029cacc9aae5e361869b73f7e76661c5ab8d549 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 13 Nov 2023 11:35:28 -0500 Subject: [PATCH 143/414] remove extra whitespace #10112 --- src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java index 69a87869fe1..a944c6aa926 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java @@ -27,12 +27,12 @@ public class ProvIT { private static boolean provEnabled = false; @BeforeAll - public static void setUpClass() { + public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - + provEnabled = provCollectionStatus.getStatusCode() == 200; - if(!provEnabled){ + if (!provEnabled) { UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); } } @@ -99,7 +99,7 @@ public void testFreeformDraftActions() { } @Test - public void testAddProvFile() { + public void testAddProvFile() { Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); From c09034d638147c5cd618e5ff4a460e1840b8cd0a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 13 Nov 2023 11:37:16 -0500 Subject: [PATCH 144/414] organize imports #10112 --- .../java/edu/harvard/iq/dataverse/api/ProvIT.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java index a944c6aa926..33323ff4239 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java @@ -1,27 +1,23 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import io.restassured.RestAssured; import io.restassured.path.json.JsonPath; import io.restassured.response.Response; import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonObject; -import static jakarta.ws.rs.core.Response.Status.CREATED; -import static jakarta.ws.rs.core.Response.Status.OK; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; +import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.junit.jupiter.api.AfterAll; - +import static jakarta.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.notNullValue; - +import org.junit.jupiter.api.AfterAll; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; - public class ProvIT { private static boolean provEnabled = false; From 75789e0f94d36fce1270b0714bd5e516f356d8ee Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 13 Nov 2023 19:06:26 -0500 Subject: [PATCH 145/414] current state of the flyway script (work in progress/likely to change) #8549 --- .../V6.0.0.3__8549-collection-quotas.sql | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql diff --git a/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql new file mode 100644 index 00000000000..f74d9bebe30 --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql @@ -0,0 +1,70 @@ +-- Storage size column added: +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS storagesize BIGINT; + +-- (work in progress! the table structure may change/the column may be moved out into +-- its own table. but the mechanics of the recursion are working) + +-- The somewhat convoluted queries below populate the storage sizes for the entire +-- DvObject tree, fast. It IS possible, to do it all with one recursive PostgresQL +-- query, that will crawl the tree from the leaves (DataFiles) up and add up the +-- sizes for all the Datasets/Collections above. Unfortunately, that takes some hours +-- on a database the size of the one at IQSS. So what we are doing instead is compute +-- the total sizes of all the *directly* linked objects, with 3 linear queries. This +-- will correctly calculate the sizes of all the Datasets (since they can only +-- contain DataFiles, directly, without any extra hierarchy possible) and those +-- Collections that only contain Datasets; but not the sizes of Collections that +-- have sub-collections. To take any sub-collections into account we are then running +-- a recursive query - but then we only need to run it on the tree of Collections only, +-- which should make it manageably fast on any real life instance. + +UPDATE dvobject SET storagesize=0; +-- For datafiles, the storage size = main file size by default: +-- (we are excluding any harvested files) +UPDATE dvobject SET storagesize=COALESCE(f.filesize,0) FROM datafile f, dataset d WHERE f.id = dvobject.id AND dvobject.owner_id = d.id AND d.harvestingclient_id IS null; +-- ... but for ingested tabular files the size of the saved original needs to be added, since +-- those also take space: +-- (should be safe to assume that there are no *harvested ingested* files) +UPDATE dvobject SET storagesize=dvobject.storagesize + COALESCE(datatable.originalFileSize,0) FROM datatable WHERE datatable.datafile_id = dvobject.id; +-- Now we can calculate storage sizes of each individual dataset (a simple sum +-- of the storage sizes of all the files in the dataset): +-- (excluding the harvested datasets; this is less important, since there should be +-- significantly fewer datasets than files, but might as well) +UPDATE dvobject SET storagesize=o.combinedStorageSize +FROM (SELECT datasetobject.id, SUM(fileobject.storagesize) AS combinedStorageSize +FROM dvobject fileobject, dvobject datasetobject +WHERE fileobject.owner_id = datasetobject.id +GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; +-- ... and then we can repeat the same for collections, by setting the storage size +-- to the sum of the storage sizes of the datasets *directly* in each collection: +-- (no attemp is made yet to recursively count the sizes all the chilld sub-collections) +UPDATE dvobject SET storagesize=o.combinedStorageSize +FROM (SELECT collectionobject.id, SUM(datasetobject.storagesize) AS combinedStorageSize +FROM dvobject datasetobject, dvobject collectionobject +WHERE datasetobject.owner_id = collectionobject.id +AND datasetobject.storagesize IS NOT null +GROUP BY collectionobject.id) o WHERE o.id = dvobject.id AND dvobject.dtype='Dataverse'; + +-- And now we will update the storage sizes of all the Collection ("Dataverse") objects +-- that contain sub-collections, *recursively*, to add their sizes to the totals: +WITH RECURSIVE treestorage (id, owner_id, storagesize, dtype) AS +( + -- All dataverses: + SELECT id, owner_id, storagesize, dtype + FROM dvobject + WHERE dtype = 'Dataverse' + + UNION + + -- Recursive Member: + SELECT dvobject.id, treestorage.owner_id, dvobject.storagesize, treestorage.dtype + FROM treestorage, dvobject + WHERE treestorage.id = dvobject.owner_id + AND dvobject.dtype = 'Dataverse' +) + +UPDATE dvobject SET storagesize=storagesize+(SELECT COALESCE(SUM(storagesize),0) +FROM treestorage WHERE owner_id=dvobject.id) +--FROM treestorage ts +--WHERE ts.owner_id=dvobject.id +WHERE dvobject.dtype = 'Dataverse' +AND dvobject.id IN (SELECT owner_id FROM treestorage WHERE owner_id IS NOT null); From c49036bf3d67d22cec384a8fe4f7cb23ed3d9a46 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 15 Nov 2023 12:06:43 +0000 Subject: [PATCH 146/414] Added: includeDeaccessioned support to getDatasetVersionCitation API endpoint --- .../harvard/iq/dataverse/api/Datasets.java | 9 ++++++-- .../harvard/iq/dataverse/api/DatasetsIT.java | 21 ++++++++++++++++++- .../edu/harvard/iq/dataverse/api/UtilIT.java | 3 ++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 292aba0cee3..68c618b0f1f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3995,9 +3995,14 @@ public Response getPrivateUrlDatasetVersionCitation(@PathParam("privateUrlToken" @GET @AuthRequired @Path("{id}/versions/{versionId}/citation") - public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { return response(req -> ok( - getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getCitation(true, false)), getRequestUser(crc)); + getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned).getCitation(true, false)), getRequestUser(crc)); } @POST diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 56bf53c1c99..d20f1e8a58b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3371,13 +3371,32 @@ public void getDatasetVersionCitation() { createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); - Response getDatasetVersionCitationResponse = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_DRAFT, apiToken); + Response getDatasetVersionCitationResponse = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_DRAFT, false, apiToken); getDatasetVersionCitationResponse.prettyPrint(); getDatasetVersionCitationResponse.then().assertThat() .statusCode(OK.getStatusCode()) // We check that the returned message contains information expected for the citation string .body("data.message", containsString("DRAFT VERSION")); + + // Test Deaccessioned + Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); + deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // includeDeaccessioned false + Response getDatasetVersionCitationNotDeaccessioned = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, false, apiToken); + getDatasetVersionCitationNotDeaccessioned.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + // includeDeaccessioned true + Response getDatasetVersionCitationDeaccessioned = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, true, apiToken); + getDatasetVersionCitationDeaccessioned.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", containsString("DEACCESSIONED VERSION")); } @Test diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e3a7fd0cfc3..2336bf8beb8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3345,10 +3345,11 @@ static Response getPrivateUrlDatasetVersionCitation(String privateUrlToken) { return response; } - static Response getDatasetVersionCitation(Integer datasetId, String version, String apiToken) { + static Response getDatasetVersionCitation(Integer datasetId, String version, boolean includeDeaccessioned, String apiToken) { Response response = given() .header(API_TOKEN_HTTP_HEADER, apiToken) .contentType("application/json") + .queryParam("includeDeaccessioned", includeDeaccessioned) .get("/api/datasets/" + datasetId + "/versions/" + version + "/citation"); return response; } From 75ff2fbad275a4543525ac0dc62f65d3eaa0e5c1 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 15 Nov 2023 12:10:14 +0000 Subject: [PATCH 147/414] Added: API docs for #10104 --- doc/sphinx-guides/source/api/native-api.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 1992390410c..2e3a0b2af08 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2502,6 +2502,16 @@ Get Citation curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER" +By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. + +If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER&includeDeaccessioned=true" + Get Citation by Private URL Token ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From be631af6e5fd5dd181aebdb0ee8a2dd1da3ff789 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 15 Nov 2023 12:12:31 +0000 Subject: [PATCH 148/414] Added: release notes for #10104 --- doc/release-notes/10104-dataset-citation-deaccessioned.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/10104-dataset-citation-deaccessioned.md diff --git a/doc/release-notes/10104-dataset-citation-deaccessioned.md b/doc/release-notes/10104-dataset-citation-deaccessioned.md new file mode 100644 index 00000000000..0ba06d729c4 --- /dev/null +++ b/doc/release-notes/10104-dataset-citation-deaccessioned.md @@ -0,0 +1 @@ +The getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. From 2fb81f6b5e1a5c735b937600b0dd74ee47d236a1 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 15 Nov 2023 10:01:52 -0500 Subject: [PATCH 149/414] altering circuit breakers for qa --- conf/solr/9.3.0/solrconfig.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/solr/9.3.0/solrconfig.xml b/conf/solr/9.3.0/solrconfig.xml index b89315cdaa9..9705faa7009 100644 --- a/conf/solr/9.3.0/solrconfig.xml +++ b/conf/solr/9.3.0/solrconfig.xml @@ -588,10 +588,10 @@ check for "Circuit Breakers tripped" in logs and the corresponding error message should tell you what transpired (if the failure was caused by tripped circuit breakers). --> - + 5 + - + 5 + - + + - + + From a376b4e3f4bacc8dc651b7048d9a323535dc92f7 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 17 Nov 2023 10:01:33 -0500 Subject: [PATCH 151/414] Add condition for 401 when a invalid key is provided and create changelog on API Guide --- doc/sphinx-guides/source/api/changelog.rst | 13 +++++++++++++ doc/sphinx-guides/source/api/index.rst | 1 + .../java/edu/harvard/iq/dataverse/api/AccessIT.java | 11 ++++++----- 3 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 doc/sphinx-guides/source/api/changelog.rst diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst new file mode 100644 index 00000000000..b78d268db33 --- /dev/null +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -0,0 +1,13 @@ +API Changelog +============= + +.. contents:: |toctitle| + :local: + :depth: 1 + +6.0.0 +----- + +Changes +~~~~~~~ + - **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. diff --git a/doc/sphinx-guides/source/api/index.rst b/doc/sphinx-guides/source/api/index.rst index c9e79098546..dd195aa9d62 100755 --- a/doc/sphinx-guides/source/api/index.rst +++ b/doc/sphinx-guides/source/api/index.rst @@ -24,3 +24,4 @@ API Guide linkeddatanotification apps faq + changelog \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java index 42e21e53101..d08f916243f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java @@ -198,6 +198,8 @@ public void testDownloadSingleFile() { //Not logged in non-restricted Response anonDownloadOriginal = UtilIT.downloadFileOriginal(tabFile1Id); Response anonDownloadConverted = UtilIT.downloadFile(tabFile1Id); + Response anonDownloadConvertedNullKey = UtilIT.downloadFile(tabFile1Id, null); + // ... and download the same tabular data file, but without the variable name header added: Response anonDownloadTabularNoHeader = UtilIT.downloadTabularFileNoVarHeader(tabFile1Id); // ... and download the same tabular file, this time requesting the "format=tab" explicitly: @@ -206,6 +208,8 @@ public void testDownloadSingleFile() { assertEquals(OK.getStatusCode(), anonDownloadConverted.getStatusCode()); assertEquals(OK.getStatusCode(), anonDownloadTabularNoHeader.getStatusCode()); assertEquals(OK.getStatusCode(), anonDownloadTabularWithFormatName.getStatusCode()); + assertEquals(UNAUTHORIZED.getStatusCode(), anonDownloadConvertedNullKey.getStatusCode()); + int origSizeAnon = anonDownloadOriginal.getBody().asByteArray().length; int convertSizeAnon = anonDownloadConverted.getBody().asByteArray().length; int tabularSizeNoVarHeader = anonDownloadTabularNoHeader.getBody().asByteArray().length; @@ -423,10 +427,7 @@ private HashMap readZipResponse(InputStream iStrea } String name = entry.getName(); -// String s = String.format("Entry: %s len %d added %TD", -// entry.getName(), entry.getSize(), -// new Date(entry.getTime())); -// System.out.println(s); + // Once we get the entry from the zStream, the zStream is // positioned read to read the raw data, and we keep @@ -466,7 +467,7 @@ private HashMap readZipResponse(InputStream iStrea @Test public void testRequestAccess() throws InterruptedException { - + String pathToJsonFile = "scripts/api/data/dataset-create-new.json"; Response createDatasetResponse = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken); createDatasetResponse.prettyPrint(); From 63725d75c115352ff9d0bb94f2e5b6b4d7ca5d05 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Nov 2023 11:07:17 -0500 Subject: [PATCH 152/414] remove cruft: mdc logs #9115 --- mdc-logs/raw-mdc-2019-01-07.log | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 mdc-logs/raw-mdc-2019-01-07.log diff --git a/mdc-logs/raw-mdc-2019-01-07.log b/mdc-logs/raw-mdc-2019-01-07.log deleted file mode 100644 index d7a6386160e..00000000000 --- a/mdc-logs/raw-mdc-2019-01-07.log +++ /dev/null @@ -1,6 +0,0 @@ -#Fields: event_time client_ip session_cookie_id user_cookie_id user_id request_url identifier filename size user-agent title publisher publisher_id authors publication_date version other_id target_url publication_year -2019-01-07T15:14:51-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:15:15-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:16:04-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:16:14-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV 168298bae7c-2c5bbc1a9c8c 1 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:16:19-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV 168298bb8ce-337d8df49763 4026 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 From 2433114ec7b8430753bc730056a07e24ac0bb5d3 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Nov 2023 11:20:03 -0500 Subject: [PATCH 153/414] fix bullet #10060 #10070 --- doc/sphinx-guides/source/api/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index b78d268db33..a1cffd84f33 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -10,4 +10,4 @@ API Changelog Changes ~~~~~~~ - - **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. +- **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. From e0350e735551270f9bd23bfa226b6946282df467 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 17 Nov 2023 11:38:53 -0500 Subject: [PATCH 154/414] Change 6.0.0 to 6.0 --- doc/sphinx-guides/source/api/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index a1cffd84f33..086ff4a20e5 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -5,7 +5,7 @@ API Changelog :local: :depth: 1 -6.0.0 +6.0 ----- Changes From 437e3b94edf89a2245310709c07d8238c0df4235 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva <142103991+jp-tosca@users.noreply.github.com> Date: Fri, 17 Nov 2023 11:42:17 -0500 Subject: [PATCH 155/414] Update doc/sphinx-guides/source/api/changelog.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/api/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 086ff4a20e5..2698ba3debf 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -10,4 +10,4 @@ API Changelog Changes ~~~~~~~ -- **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. +- **/api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. From 640f69e39f71244b9ba1d7f534180a6b4c8b58cc Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Nov 2023 13:19:14 -0500 Subject: [PATCH 156/414] add release note for API changelog #10060 --- doc/release-notes/10060-api-changelog.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/10060-api-changelog.md diff --git a/doc/release-notes/10060-api-changelog.md b/doc/release-notes/10060-api-changelog.md new file mode 100644 index 00000000000..56ac96e3564 --- /dev/null +++ b/doc/release-notes/10060-api-changelog.md @@ -0,0 +1,3 @@ +We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html + +See also #10060. From 83a66aac65db2f7634b3917d332b0e4253be3c84 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva <142103991+jp-tosca@users.noreply.github.com> Date: Fri, 17 Nov 2023 14:55:58 -0500 Subject: [PATCH 157/414] Update doc/sphinx-guides/source/api/changelog.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/api/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 2698ba3debf..f518a9b542d 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -10,4 +10,4 @@ API Changelog Changes ~~~~~~~ -- **/api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. +- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed to happy (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. From 70edaa789e84c99b110036c232155337afb5c459 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 17 Nov 2023 15:02:32 -0500 Subject: [PATCH 158/414] Remove "to happy " --- doc/sphinx-guides/source/api/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index f518a9b542d..d6742252d27 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -10,4 +10,4 @@ API Changelog Changes ~~~~~~~ -- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed to happy (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. +- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. From 73593acb1bcdb9ba1d62e47310753e905b2546dd Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 17 Nov 2023 15:17:28 -0500 Subject: [PATCH 159/414] #9464 query by dvo. update IT --- .../dataverse/metrics/MetricsServiceBean.java | 33 ++++++++++--------- .../harvard/iq/dataverse/api/MetricsIT.java | 14 +++++--- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 79369207963..832dda5ced9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat + "from datasetversion\n" + "where versionstate='RELEASED' \n" + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n") - + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "") - + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n " : "") + + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "") + + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n " : "") + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n") + "group by dataset_id) as subq group by subq.date order by date;" @@ -156,11 +156,11 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat * @param d */ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { - String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n"; + String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -189,7 +189,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + "and \n" @@ -212,8 +212,9 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + " from datasetversion\n" + " join dataset on dataset.id = datasetversion.dataset_id\n" + + " join dvobject on dataset.id = dvobject.id\n" + " where versionstate='RELEASED'\n" + - " and dataset.harvestingclient_id is null\n" + + " and dvobject.harvestingclient_id is null\n" + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + " group by dataset_id\n" + "))\n"; @@ -225,7 +226,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio // so the query is simpler: String harvestOriginClause = "(\n" + " datasetversion.dataset_id = dataset.id\n" + - " AND dataset.harvestingclient_id IS NOT null \n" + + " AND dvobject.harvestingclient_id IS NOT null \n" + " AND date_trunc('month', datasetversion.createtime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + ")\n"; @@ -244,7 +245,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n" + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n" + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "JOIN dvobject ON dvobject.id = dataset.id\n") + + "JOIN dvobject ON dvobject.id = dataset.id\n" + "WHERE\n" + originClause + "AND datasetfieldtype.name = 'subject'\n" @@ -258,11 +259,11 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio } public long datasetsPastDays(int days, String dataLocation, Dataverse d) { - String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NULL)\n"; + String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -276,7 +277,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and \n" @@ -304,7 +305,7 @@ public JsonArray filesTimeSeries(Dataverse d) { + "where datasetversion.id=filemetadata.datasetversion_id\n" + "and versionstate='RELEASED' \n" + "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n" - + "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " + + "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " + ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ") + "group by filemetadata.id) as subq group by subq.date order by date;"); logger.log(Level.FINE, "Metric query: {0}", query); @@ -327,11 +328,11 @@ public long filesToMonth(String yyyymm, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED'\n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" - + "and dataset.harvestingclient_id is null\n" + + "and dvobject.harvestingclient_id is null\n" + "group by dataset_id \n" + ");" ); @@ -350,11 +351,11 @@ public long filesPastDays(int days, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED'\n" + "and releasetime > current_date - interval '" + days + "' day\n" + ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") - + "and dataset.harvestingclient_id is null\n" + + "and dvobject.harvestingclient_id is null\n" + "group by dataset_id \n" + ");" ); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java index e3328eefb4a..b961a86dc0b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java @@ -30,7 +30,7 @@ public static void cleanUpClass() { @Test public void testGetDataversesToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsDataversesToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -54,7 +54,7 @@ public void testGetDataversesToMonth() { @Test public void testGetDatasetsToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -77,7 +77,7 @@ public void testGetDatasetsToMonth() { @Test public void testGetFilesToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsFilesToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -100,7 +100,7 @@ public void testGetFilesToMonth() { @Test public void testGetDownloadsToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -283,6 +283,12 @@ public void testGetDatasetsBySubject() { response = UtilIT.metricsDatasetsBySubject("dataLocation=local"); response.then().assertThat() .statusCode(OK.getStatusCode()); + + //Test ok when passing remote + response = UtilIT.metricsDatasetsBySubject("dataLocation=remote"); + response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); } @Test From d0fc9affdf52dfd60461520adb20a6c7d30e7d6b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 17 Nov 2023 15:31:50 -0500 Subject: [PATCH 160/414] refactor to avoid overloaded methods in constructors --- .../AbstractRemoteOverlayAccessIO.java | 335 ++++++++++++++++++ .../dataaccess/GlobusAccessibleStore.java | 4 +- .../dataaccess/GlobusOverlayAccessIO.java | 51 ++- .../dataaccess/RemoteOverlayAccessIO.java | 315 +--------------- .../dataaccess/RemoteOverlayAccessIOTest.java | 1 - 5 files changed, 390 insertions(+), 316 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java new file mode 100644 index 00000000000..8adaf746210 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -0,0 +1,335 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.channels.Channel; +import java.nio.file.Path; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.util.List; +import java.util.function.Predicate; +import java.util.logging.Logger; + +import javax.net.ssl.SSLContext; + +import org.apache.http.Header; +import org.apache.http.client.config.CookieSpecs; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpHead; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.protocol.HTTP; +import org.apache.http.ssl.SSLContextBuilder; +import org.apache.http.util.EntityUtils; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObject; + +public abstract class AbstractRemoteOverlayAccessIO extends StorageIO { + + protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); + protected static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; + static final String BASE_STORE = "base-store"; + protected static final String SECRET_KEY = "secret-key"; + static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; + protected static final String REMOTE_STORE_NAME = "remote-store-name"; + protected static final String REMOTE_STORE_URL = "remote-store-url"; + protected StorageIO baseStore = null; + protected String path = null; + protected PoolingHttpClientConnectionManager cm = null; + CloseableHttpClient httpclient = null; + protected static HttpClientContext localContext = HttpClientContext.create(); + + protected int timeout = 1200; + protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); + protected static boolean trustCerts = false; + protected int httpConcurrency = 4; + + public static String getBaseStoreIdFor(String driverId) { + return getConfigParamForDriver(driverId, BASE_STORE); + } + + public AbstractRemoteOverlayAccessIO() { + super(); + } + + public AbstractRemoteOverlayAccessIO(String storageLocation, String driverId) { + super(storageLocation, driverId); + } + + public AbstractRemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) { + super(dvObject, req, driverId); + } + + @Override + public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { + return baseStore.openAuxChannel(auxItemTag, options); + } + + @Override + public boolean isAuxObjectCached(String auxItemTag) throws IOException { + return baseStore.isAuxObjectCached(auxItemTag); + } + + @Override + public long getAuxObjectSize(String auxItemTag) throws IOException { + return baseStore.getAuxObjectSize(auxItemTag); + } + + @Override + public Path getAuxObjectAsPath(String auxItemTag) throws IOException { + return baseStore.getAuxObjectAsPath(auxItemTag); + } + + @Override + public void backupAsAux(String auxItemTag) throws IOException { + baseStore.backupAsAux(auxItemTag); + } + + @Override + public void revertBackupAsAux(String auxItemTag) throws IOException { + baseStore.revertBackupAsAux(auxItemTag); + } + + @Override + public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { + baseStore.savePathAsAux(fileSystemPath, auxItemTag); + } + + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); + } + + /** + * @param inputStream InputStream we want to save + * @param auxItemTag String representing this Auxiliary type ("extension") + * @throws IOException if anything goes wrong. + */ + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag); + } + + @Override + public List listAuxObjects() throws IOException { + return baseStore.listAuxObjects(); + } + + @Override + public void deleteAuxObject(String auxItemTag) throws IOException { + baseStore.deleteAuxObject(auxItemTag); + } + + @Override + public void deleteAllAuxObjects() throws IOException { + baseStore.deleteAllAuxObjects(); + } + + @Override + public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { + return baseStore.getAuxFileAsInputStream(auxItemTag); + } + + protected int getUrlExpirationMinutes() { + String optionValue = getConfigParam(URL_EXPIRATION_MINUTES); + if (optionValue != null) { + Integer num; + try { + num = Integer.parseInt(optionValue); + } catch (NumberFormatException ex) { + num = null; + } + if (num != null) { + return num; + } + } + return 60; + } + + public CloseableHttpClient getSharedHttpClient() { + if (httpclient == null) { + try { + initHttpPool(); + httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); + + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { + logger.warning(ex.getMessage()); + } + } + return httpclient; + } + + private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { + if (trustCerts) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + SSLContext sslContext; + SSLConnectionSocketFactory connectionFactory; + + sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); + // create an SSL Socket Factory to use the SSLContext with the trust self signed + // certificate strategy + // and allow all hosts verifier. + connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); + + Registry registry = RegistryBuilder.create() + .register("https", connectionFactory).build(); + cm = new PoolingHttpClientConnectionManager(registry); + } else { + cm = new PoolingHttpClientConnectionManager(); + } + cm.setDefaultMaxPerRoute(httpConcurrency); + cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); + } + + @Override + abstract public long retrieveSizeFromMedia(); + + @Override + public boolean exists() { + logger.fine("Exists called"); + return (retrieveSizeFromMedia() != -1); + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + return baseStore.cleanUp(filter, dryRun); + } + + @Override + public String getStorageLocation() throws IOException { + String fullStorageLocation = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStorageLocation); + int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); + if (driverIndex >= 0) { + fullStorageLocation = fullStorageLocation + .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + if (this.getDvObject() instanceof Dataset) { + throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); + } else if (this.getDvObject() instanceof DataFile) { + fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; + } else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStorageLocation: " + fullStorageLocation); + return fullStorageLocation; + } + protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + + if (baseStore == null) { + String baseDriverId = getBaseStoreIdFor(driverId); + String fullStorageLocation = null; + String baseDriverType = getConfigParamForDriver(baseDriverId, StorageIO.TYPE, + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if (dvObject instanceof Dataset) { + baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); + } else { + if (this.getDvObject() != null) { + fullStorageLocation = getStoragePath(); + + // S3 expects :/// + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") + + "/" + fullStorageLocation; + break; + default: + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); + throw new IOException("Not supported"); + } + + } else if (storageLocation != null) { + // ://// + // remoteDriverId:// is removed if coming through directStorageIO + int index = storageLocation.indexOf(DataAccess.SEPARATOR); + if (index > 0) { + storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); + } + // The base store needs the baseStoreIdentifier and not the relative URL (if it exists) + int endOfId = storageLocation.indexOf("//"); + fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation; + + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") + + "/" + fullStorageLocation; + break; + default: + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); + throw new IOException("Not supported"); + } + } + baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); + } + if (baseDriverType.contentEquals(DataAccess.S3)) { + ((S3AccessIO) baseStore).setMainDriver(false); + } + } + remoteStoreName = getConfigParam(REMOTE_STORE_NAME); + try { + remoteStoreUrl = new URL(getConfigParam(REMOTE_STORE_URL)); + } catch (MalformedURLException mfue) { + logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); + } + } + + protected String getStoragePath() throws IOException { + String fullStoragePath = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStoragePath); + int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); + if (driverIndex >= 0) { + fullStoragePath = fullStoragePath + .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + int suffixIndex = fullStoragePath.indexOf("//"); + if (suffixIndex >= 0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); + } + if (getDvObject() instanceof Dataset) { + fullStoragePath = getDataset().getAuthorityForFileStorage() + "/" + + getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (getDvObject() instanceof DataFile) { + fullStoragePath = getDataFile().getOwner().getAuthorityForFileStorage() + "/" + + getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStoragePath: " + fullStoragePath); + return fullStoragePath; + } + + + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java index afc7556481a..ce75395c883 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -28,7 +28,7 @@ public static String getTransferPath(String driverId) { } public static JsonArray getReferenceEndpointsWithPaths(String driverId) { - String[] endpoints = StorageIO.getConfigParamForDriver(driverId, RemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*"); + String[] endpoints = StorageIO.getConfigParamForDriver(driverId, AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*"); JsonArrayBuilder builder = Json.createArrayBuilder(); for(int i=0;i extends RemoteOverlayAccessIO implements GlobusAccessibleStore { +public class GlobusOverlayAccessIO extends AbstractRemoteOverlayAccessIO implements GlobusAccessibleStore { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); /* @@ -67,11 +68,19 @@ public class GlobusOverlayAccessIO extends RemoteOverlayAcce public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); + configureGlobusEndpoints(); + configureStores(req, driverId, null); + logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); + path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(path); + + logger.fine("Relative path: " + path); } public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { this.driverId = driverId; + configureGlobusEndpoints(); configureStores(null, driverId, storageLocation); if (isManaged()) { String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); @@ -83,6 +92,7 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE logger.fine("Referenced path: " + path); } } + private boolean isManaged() { if(dataverseManaged==null) { dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId); @@ -146,7 +156,6 @@ private static String findMatchingEndpoint(String path, String[] allowedEndpoint return null; } - @Override protected void validatePath(String relPath) throws IOException { if (isManaged()) { if (!usesStandardNamePattern(relPath)) { @@ -363,8 +372,7 @@ public String getStorageLocation() throws IOException { * the derived GlobusOverlayAccessIO can support multiple endpoints. * @throws IOException */ - @Override - protected void configureEndpoints() throws IOException { + protected void configureGlobusEndpoints() throws IOException { allowedEndpoints = getAllowedEndpoints(this.driverId); logger.info("Set allowed endpoints: " + Arrays.toString(allowedEndpoints)); } @@ -435,5 +443,40 @@ public static void main(String[] args) { } } + + + @Override + public void open(DataAccessOption... option) throws IOException { + // TODO Auto-generated method stub + + } + + + @Override + public Path getFileSystemPath() throws IOException { + // TODO Auto-generated method stub + return null; + } + + + @Override + public void savePath(Path fileSystemPath) throws IOException { + // TODO Auto-generated method stub + + } + + + @Override + public void saveInputStream(InputStream inputStream) throws IOException { + // TODO Auto-generated method stub + + } + + + @Override + public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { + // TODO Auto-generated method stub + + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 5463254140d..1616bfabf96 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -11,45 +11,23 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; -import java.net.URL; import java.nio.channels.Channel; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.file.Path; -import java.security.KeyManagementException; -import java.security.KeyStoreException; -import java.security.NoSuchAlgorithmException; import java.util.List; -import java.util.function.Predicate; -import java.util.logging.Logger; import org.apache.http.Header; -import org.apache.http.client.config.CookieSpecs; -import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpHead; -import org.apache.http.client.protocol.HttpClientContext; -import org.apache.http.config.Registry; -import org.apache.http.config.RegistryBuilder; -import org.apache.http.conn.socket.ConnectionSocketFactory; -import org.apache.http.conn.ssl.NoopHostnameVerifier; -import org.apache.http.conn.ssl.SSLConnectionSocketFactory; -import org.apache.http.conn.ssl.TrustAllStrategy; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.protocol.HTTP; -import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.util.EntityUtils; -import javax.net.ssl.SSLContext; - /** * @author qqmyers */ @@ -61,40 +39,20 @@ * * baseUrl: http(s):// */ -public class RemoteOverlayAccessIO extends StorageIO { - - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); +public class RemoteOverlayAccessIO extends AbstractRemoteOverlayAccessIO { // A single baseUrl of the form http(s):// where this store can reference data static final String BASE_URL = "base-url"; - // Multiple endpoints where data can be referenced from. Multiple endpoints are separated by a comma. Multiple endpoints are only supported by the GlobalOverlayAccessIO at present. - static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; - static final String BASE_STORE = "base-store"; - static final String SECRET_KEY = "secret-key"; - static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; - static final String REMOTE_STORE_NAME = "remote-store-name"; - static final String REMOTE_STORE_URL = "remote-store-url"; - - protected StorageIO baseStore = null; - protected String path = null; - private String baseUrl = null; - - protected static HttpClientContext localContext = HttpClientContext.create(); - protected PoolingHttpClientConnectionManager cm = null; - CloseableHttpClient httpclient = null; - protected int timeout = 1200; - protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) - .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); - protected static boolean trustCerts = false; - protected int httpConcurrency = 4; + String baseUrl = null; public RemoteOverlayAccessIO() { + super(); } public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); this.setIsLocalFile(false); + configureRemoteEndpoints(); configureStores(req, driverId, null); logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); @@ -106,6 +64,7 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException { super(null, null, driverId); this.setIsLocalFile(false); + configureRemoteEndpoints(); configureStores(null, driverId, storageLocation); path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); @@ -296,105 +255,12 @@ public void delete() throws IOException { } - @Override - public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { - return baseStore.openAuxChannel(auxItemTag, options); - } - - @Override - public boolean isAuxObjectCached(String auxItemTag) throws IOException { - return baseStore.isAuxObjectCached(auxItemTag); - } - - @Override - public long getAuxObjectSize(String auxItemTag) throws IOException { - return baseStore.getAuxObjectSize(auxItemTag); - } - - @Override - public Path getAuxObjectAsPath(String auxItemTag) throws IOException { - return baseStore.getAuxObjectAsPath(auxItemTag); - } - - @Override - public void backupAsAux(String auxItemTag) throws IOException { - baseStore.backupAsAux(auxItemTag); - } - - @Override - public void revertBackupAsAux(String auxItemTag) throws IOException { - baseStore.revertBackupAsAux(auxItemTag); - } - - @Override - // this method copies a local filesystem Path into this DataAccess Auxiliary - // location: - public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { - baseStore.savePathAsAux(fileSystemPath, auxItemTag); - } - - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); - } - - /** - * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") - * @throws IOException if anything goes wrong. - */ - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag); - } - - @Override - public List listAuxObjects() throws IOException { - return baseStore.listAuxObjects(); - } - - @Override - public void deleteAuxObject(String auxItemTag) throws IOException { - baseStore.deleteAuxObject(auxItemTag); - } - - @Override - public void deleteAllAuxObjects() throws IOException { - baseStore.deleteAllAuxObjects(); - } - - @Override - public String getStorageLocation() throws IOException { - String fullStorageLocation = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStorageLocation); - int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); - if (driverIndex >= 0) { - fullStorageLocation = fullStorageLocation - .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - if (this.getDvObject() instanceof Dataset) { - throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); - } else if (this.getDvObject() instanceof DataFile) { - fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; - } else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStorageLocation: " + fullStorageLocation); - return fullStorageLocation; - } - @Override public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); } - @Override - public boolean exists() { - logger.fine("Exists called"); - return (retrieveSizeFromMedia() != -1); - } - @Override public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( @@ -407,11 +273,6 @@ public OutputStream getOutputStream() throws UnsupportedDataAccessOperationExcep "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects."); } - @Override - public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { - return baseStore.getAuxFileAsInputStream(auxItemTag); - } - @Override public boolean downloadRedirectEnabled() { String optionValue = getConfigParam(StorageIO.DOWNLOAD_REDIRECT); @@ -443,103 +304,12 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary } } - int getUrlExpirationMinutes() { - String optionValue = getConfigParam(URL_EXPIRATION_MINUTES); - if (optionValue != null) { - Integer num; - try { - num = Integer.parseInt(optionValue); - } catch (NumberFormatException ex) { - num = null; - } - if (num != null) { - return num; - } - } - return 60; - } - - protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - configureEndpoints(); - - - if (baseStore == null) { - String baseDriverId = getBaseStoreIdFor(driverId); - String fullStorageLocation = null; - String baseDriverType = getConfigParamForDriver(baseDriverId, StorageIO.TYPE, - DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - - if (dvObject instanceof Dataset) { - baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); - } else { - if (this.getDvObject() != null) { - fullStorageLocation = getStoragePath(); - - // S3 expects :/// - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") - + "/" + fullStorageLocation; - break; - default: - logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " - + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); - throw new IOException("Not supported"); - } - - } else if (storageLocation != null) { - // ://// - // remoteDriverId:// is removed if coming through directStorageIO - int index = storageLocation.indexOf(DataAccess.SEPARATOR); - if (index > 0) { - storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); - } - // The base store needs the baseStoreIdentifier and not the relative URL (if it exists) - int endOfId = storageLocation.indexOf("//"); - fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation; - - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") - + "/" + fullStorageLocation; - break; - default: - logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " - + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); - throw new IOException("Not supported"); - } - } - baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); - } - if (baseDriverType.contentEquals(DataAccess.S3)) { - ((S3AccessIO) baseStore).setMainDriver(false); - } - } - remoteStoreName = getConfigParam(REMOTE_STORE_NAME); - try { - remoteStoreUrl = new URL(getConfigParam(REMOTE_STORE_URL)); - } catch (MalformedURLException mfue) { - logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); - } - } /** This endpoint configures all the endpoints the store is allowed to reference data from. At present, the RemoteOverlayAccessIO only supports a single endpoint but * the derived GlobusOverlayAccessIO can support multiple endpoints. * @throws IOException */ - protected void configureEndpoints() throws IOException { + protected void configureRemoteEndpoints() throws IOException { baseUrl = getConfigParam(BASE_URL); if (baseUrl == null) { //Will accept the first endpoint using the newer setting @@ -560,70 +330,6 @@ protected void configureEndpoints() throws IOException { } } - // Convenience method to assemble the path, starting with the DOI - // authority/identifier/, that is needed to create a base store via - // DataAccess.getDirectStorageIO - the caller has to add the store type specific - // prefix required. - protected String getStoragePath() throws IOException { - String fullStoragePath = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStoragePath); - int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); - if (driverIndex >= 0) { - fullStoragePath = fullStoragePath - .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - int suffixIndex = fullStoragePath.indexOf("//"); - if (suffixIndex >= 0) { - fullStoragePath = fullStoragePath.substring(0, suffixIndex); - } - if (this.getDvObject() instanceof Dataset) { - fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" - + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; - } else if (this.getDvObject() instanceof DataFile) { - fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; - } else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStoragePath: " + fullStoragePath); - return fullStoragePath; - } - - public CloseableHttpClient getSharedHttpClient() { - if (httpclient == null) { - try { - initHttpPool(); - httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); - - } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { - logger.warning(ex.getMessage()); - } - } - return httpclient; - } - - private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { - if (trustCerts) { - // use the TrustSelfSignedStrategy to allow Self Signed Certificates - SSLContext sslContext; - SSLConnectionSocketFactory connectionFactory; - - sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); - // create an SSL Socket Factory to use the SSLContext with the trust self signed - // certificate strategy - // and allow all hosts verifier. - connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); - - Registry registry = RegistryBuilder.create() - .register("https", connectionFactory).build(); - cm = new PoolingHttpClientConnectionManager(registry); - } else { - cm = new PoolingHttpClientConnectionManager(); - } - cm.setDefaultMaxPerRoute(httpConcurrency); - cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); - } - @Override public void savePath(Path fileSystemPath) throws IOException { throw new UnsupportedDataAccessOperationException( @@ -660,13 +366,4 @@ static boolean isValidIdentifier(String driverId, String storageId) { } return true; } - - public static String getBaseStoreIdFor(String driverId) { - return getConfigParamForDriver(driverId, BASE_STORE); - } - - @Override - public List cleanUp(Predicate filter, boolean dryRun) throws IOException { - return baseStore.cleanUp(filter, dryRun); - } } diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java index 5affc01aff0..1c371881ba6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -8,7 +8,6 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.mocks.MocksFactory; import edu.harvard.iq.dataverse.util.UrlSignerUtil; From 2500bccc5fa438bf2dff4e5aa887e816099a51e3 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 15 Nov 2023 14:04:22 -0500 Subject: [PATCH 161/414] assert current /bag-info.txt behavior #8760 Also, add a superuser-only API for downloading files (such as bags) from the file system so we can make assertions about them in our tests. --- .../iq/dataverse/api/AbstractApiBean.java | 7 ++ .../edu/harvard/iq/dataverse/api/Admin.java | 25 ++++- .../edu/harvard/iq/dataverse/api/BagIT.java | 101 +++++++++++++++++- 3 files changed, 128 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 027f9e0fcb1..58565bcc9d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -45,11 +45,13 @@ import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; +import java.io.InputStream; import java.net.URI; import java.util.Arrays; import java.util.Collections; @@ -726,6 +728,11 @@ protected Response ok(String data, MediaType mediaType, String downloadFilename) return res.build(); } + protected Response ok(InputStream inputStream) { + ResponseBuilder res = Response.ok().entity(inputStream).type(MediaType.valueOf(FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT)); + return res.build(); + } + protected Response created( String uri, JsonObjectBuilder bld ) { return Response.created( URI.create(uri) ) .entity( Json.createObjectBuilder() diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index fd3b9a89e54..684ed32dff8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -107,6 +107,7 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.UrlSignerUtil; +import java.io.FileInputStream; import java.io.IOException; import java.io.OutputStream; @@ -2425,5 +2426,27 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl)); } - + + /** + * For testing only. Download a file from the file system. + */ + @GET + @AuthRequired + @Path("/localfile") + public Response getLocalFile(@Context ContainerRequestContext crc, @QueryParam("pathToFile") String pathToFile) { + try { + AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc); + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + try { + return ok(new FileInputStream(pathToFile)); + } catch (IOException ex) { + return error(Status.BAD_REQUEST, ex.toString()); + } + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java index e7210bc45a9..fae9cf95156 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java @@ -1,17 +1,32 @@ package edu.harvard.iq.dataverse.api; -import io.restassured.RestAssured; -import io.restassured.response.Response; import edu.harvard.iq.dataverse.engine.command.impl.LocalSubmitToArchiveCommand; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import io.restassured.RestAssured; +import static io.restassured.RestAssured.given; +import io.restassured.response.Response; import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.OK; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.util.Enumeration; +import java.util.Scanner; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; import org.junit.jupiter.api.AfterAll; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; public class BagIT { + static String bagitExportDir = "/tmp"; + @BeforeAll public static void setUpClass() { @@ -25,14 +40,14 @@ public static void setUpClass() { setArchiverSettings.then().assertThat() .statusCode(OK.getStatusCode()); - Response setBagItLocalPath = UtilIT.setSetting(":BagItLocalPath", "/tmp"); + Response setBagItLocalPath = UtilIT.setSetting(":BagItLocalPath", bagitExportDir); setBagItLocalPath.then().assertThat() .statusCode(OK.getStatusCode()); } @Test - public void testBagItExport() { + public void testBagItExport() throws IOException { Response createUser = UtilIT.createRandomUser(); createUser.then().assertThat().statusCode(OK.getStatusCode()); @@ -63,6 +78,78 @@ public void testBagItExport() { archiveDataset.prettyPrint(); archiveDataset.then().assertThat().statusCode(OK.getStatusCode()); + // spaceName comes from LocalSubmitToArchiveCommand + String spaceName = datasetPid.replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); + // spacename: doi-10-5072-fk2-fosg5q + + String pathToZip = bagitExportDir + "/" + spaceName + "v1.0" + ".zip"; + + try { + // give the bag time to generate + Thread.sleep(3000); + } catch (InterruptedException ex) { + } + + // A bag could look like this: + //doi-10-5072-FK2-DKUTDUv-1-0/data/ + //doi-10-5072-FK2-DKUTDUv-1-0/data/Darwin's Finches/ + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/ + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/pid-mapping.txt + //doi-10-5072-FK2-DKUTDUv-1-0/manifest-md5.txt + //doi-10-5072-FK2-DKUTDUv-1-0/bagit.txt + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/oai-ore.jsonld + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/datacite.xml + //doi-10-5072-FK2-DKUTDUv-1-0/bag-info.txt + // --- + // bag-info.txt could look like this: + //Contact-Name: Finch, Fiona + //Contact-Email: finch@mailinator.com + //Source-Organization: Dataverse Installation () + //Organization-Address: + //Organization-Email: + //External-Description: Darwin's finches (also known as the Galápagos finches) are a group of about + // fifteen species of passerine birds. + //Bagging-Date: 2023-11-14 + //External-Identifier: https://doi.org/10.5072/FK2/LZIGBC + //Bag-Size: 0 bytes + //Payload-Oxum: 0.0 + //Internal-Sender-Identifier: Root:Darwin's Finches + Response downloadBag = downloadLocalFile(pathToZip, apiToken); + downloadBag.then().assertThat().statusCode(OK.getStatusCode()); + Path outputPath = Paths.get("/tmp/foo.zip"); + java.nio.file.Files.copy(downloadBag.getBody().asInputStream(), outputPath, StandardCopyOption.REPLACE_EXISTING); + + ZipFile zipFile = new ZipFile(outputPath.toString()); + Enumeration entries = zipFile.entries(); + String sourceOrg = null; + String orgAddress = null; + String orgEmail = null; + while (entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + String name = entry.getName(); + System.out.println("name: " + name); + if (name.endsWith("bag-info.txt")) { + InputStream stream = zipFile.getInputStream(entry); + Scanner s = new Scanner(stream).useDelimiter("\\A"); + String result = s.hasNext() ? s.next() : ""; + System.out.println("result: " + result); + String[] lines = result.split("\n"); + for (String line : lines) { + if (line.startsWith("Source-Organization")) { + sourceOrg = line; + } else if (line.startsWith("Organization-Address")) { + orgAddress = line; + } else if (line.startsWith("Organization-Email")) { + orgEmail = line; + } else { + } + } + } + } + assertEquals("Source-Organization: Dataverse Installation ()", sourceOrg.trim()); + assertEquals("Organization-Address: ", orgAddress.trim()); + assertEquals("Organization-Email: ", orgEmail.trim()); } @AfterAll @@ -75,4 +162,10 @@ public static void tearDownClass() { } + static Response downloadLocalFile(String pathToFile, String apiToken) { + return given() + .header("X-Dataverse-key", apiToken) + .get("/api/admin/localfile?pathToFile=" + pathToFile); + } + } From 7240e870d35fda4ec96a4ee0e0b488a9c4fc3d4f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 15 Nov 2023 16:03:15 -0500 Subject: [PATCH 162/414] configurable BagIt source org name, address, email #8760 These values were used while testing: DATAVERSE_BAGIT_SOURCEORG_NAME=LibraScholar DATAVERSE_BAGIT_SOURCEORG_ADDRESS=123 Wisdom Way\nCambridge, MA\nUSA DATAVERSE_BAGIT_SOURCEORG_EMAIL=hello@dataverse.librascholar.edu --- .../iq/dataverse/settings/JvmSettings.java | 7 +++++++ .../iq/dataverse/util/bagit/BagGenerator.java | 15 ++++++++++----- src/main/java/propertyFiles/Bundle.properties | 4 ---- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index cc3272413c7..2f59350906c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -150,6 +150,13 @@ public enum JvmSettings { SCOPE_NETCDF(PREFIX, "netcdf"), GEO_EXTRACT_S3_DIRECT_UPLOAD(SCOPE_NETCDF, "geo-extract-s3-direct-upload"), + // BAGIT SETTINGS + SCOPE_BAGIT(PREFIX, "bagit"), + SCOPE_BAGIT_SOURCEORG(SCOPE_BAGIT, "sourceorg"), + BAGIT_SOURCE_ORG_NAME(SCOPE_BAGIT_SOURCEORG, "name"), + BAGIT_SOURCEORG_ADDRESS(SCOPE_BAGIT_SOURCEORG, "address"), + BAGIT_SOURCEORG_EMAIL(SCOPE_BAGIT_SOURCEORG, "email"), + ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index baba1a0cb43..b7c44014b80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -74,7 +74,9 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; +import java.util.Optional; public class BagGenerator { @@ -822,17 +824,20 @@ private String generateInfoFile() { logger.warning("No contact info available for BagIt Info file"); } - info.append("Source-Organization: " + BundleUtil.getStringFromBundle("bagit.sourceOrganization")); + String orgName = JvmSettings.BAGIT_SOURCE_ORG_NAME.lookupOptional(String.class).orElse("Dataverse Installation ()"); + String orgAddress = JvmSettings.BAGIT_SOURCEORG_ADDRESS.lookupOptional(String.class).orElse(""); + String orgEmail = JvmSettings.BAGIT_SOURCEORG_EMAIL.lookupOptional(String.class).orElse(""); + + info.append("Source-Organization: " + orgName); // ToDo - make configurable info.append(CRLF); - info.append("Organization-Address: " + WordUtils.wrap( - BundleUtil.getStringFromBundle("bagit.sourceOrganizationAddress"), 78, CRLF + " ", true)); + info.append("Organization-Address: " + WordUtils.wrap(orgAddress, 78, CRLF + " ", true)); + info.append(CRLF); // Not a BagIt standard name - info.append( - "Organization-Email: " + BundleUtil.getStringFromBundle("bagit.sourceOrganizationEmail")); + info.append("Organization-Email: " + orgEmail); info.append(CRLF); info.append("External-Description: "); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 79887f7e76c..972e5e35601 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2379,10 +2379,6 @@ api.prov.error.freeformMissingJsonKey=The JSON object you send must have a key c api.prov.error.freeformNoText=No provenance free form text available for this file. api.prov.error.noDataFileFound=Could not find a file based on ID. -bagit.sourceOrganization=Dataverse Installation () -bagit.sourceOrganizationAddress= -bagit.sourceOrganizationEmail= - bagit.checksum.validation.error=Invalid checksum for file "{0}". Manifest checksum={2}, calculated checksum={3}, type={1} bagit.checksum.validation.exception=Error while calculating checksum for file "{0}". Checksum type={1}, error={2} bagit.validation.bag.file.not.found=Invalid BagIt package: "{0}" From b2c62510e71e6436c2905796b9cc6a24a04b35d0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Nov 2023 14:06:06 -0500 Subject: [PATCH 163/414] add docs and release note for bag-info.txt config #8760 --- doc/release-notes/8760-bagit.md | 15 ++++++ .../source/installation/config.rst | 46 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 doc/release-notes/8760-bagit.md diff --git a/doc/release-notes/8760-bagit.md b/doc/release-notes/8760-bagit.md new file mode 100644 index 00000000000..30601857309 --- /dev/null +++ b/doc/release-notes/8760-bagit.md @@ -0,0 +1,15 @@ +For BagIT export, it is now possible to configure the following information in bag-info.txt... + +Source-Organization: Harvard Dataverse +Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA +Organization-Email: support@dataverse.harvard.edu + +... using new JVM/MPCONFIG options: + +- dataverse.bagit.sourceorg.name +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.email + +Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. + +For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 13a7367de44..df311fcdaca 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1608,6 +1608,25 @@ The workflow id returned in this call (or available by doing a GET of /api/admin Once these steps are taken, new publication requests will automatically trigger submission of an archival copy to the specified archiver, Chronopolis' DuraCloud component in this example. For Chronopolis, as when using the API, it is currently the admin's responsibility to snap-shot the DuraCloud space and monitor the result. Failure of the workflow, (e.g. if DuraCloud is unavailable, the configuration is wrong, or the space for this dataset already exists due to a prior publication action or use of the API), will create a failure message but will not affect publication itself. +.. _bag-info.txt: + +Configuring bag-info.txt +++++++++++++++++++++++++ + +Out of the box, placeholder values like below will be placed in bag-info.txt: + +.. code-block:: text + + Source-Organization: Dataverse Installation () + Organization-Address: + Organization-Email: + +To customize these values for your institution, use the following JVM options: + +- :ref:`dataverse.bagit.sourceorg.name` +- :ref:`dataverse.bagit.sourceorg.address` +- :ref:`dataverse.bagit.sourceorg.email` + Going Live: Launching Your Production Deployment ------------------------------------------------ @@ -2506,6 +2525,33 @@ See also :ref:`guestbook-at-request-api` in the API Guide, and . Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_GUESTBOOK_AT_REQUEST``. +.. _dataverse.bagit.sourceorg.name: + +dataverse.bagit.sourceorg.name +++++++++++++++++++++++++++++++ + +The name for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_NAME``. + +.. _dataverse.bagit.sourceorg.address: + +dataverse.bagit.sourceorg.address ++++++++++++++++++++++++++++++++++ + +The mailing address for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`. The example in https://datatracker.ietf.org/doc/html/rfc8493 uses commas as separators: ``1 Main St., Cupertino, California, 11111``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_ADDRESS``. + +.. _dataverse.bagit.sourceorg.email: + +dataverse.bagit.sourceorg.email ++++++++++++++++++++++++++++++++ + +The email for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``. + .. _feature-flags: Feature Flags From fa6f850b28e8dea1dd2dff542814e29fd7865153 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Nov 2023 16:07:30 -0500 Subject: [PATCH 164/414] limit to downloading from /tmp, add docs #8760 --- doc/release-notes/8760-download-tmp-file.md | 3 +++ doc/sphinx-guides/source/api/changelog.rst | 7 +++++ doc/sphinx-guides/source/api/native-api.rst | 10 +++++++ .../edu/harvard/iq/dataverse/api/Admin.java | 13 +++++++--- .../edu/harvard/iq/dataverse/api/AdminIT.java | 26 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/BagIT.java | 10 ++----- .../edu/harvard/iq/dataverse/api/UtilIT.java | 7 +++++ 7 files changed, 64 insertions(+), 12 deletions(-) create mode 100644 doc/release-notes/8760-download-tmp-file.md diff --git a/doc/release-notes/8760-download-tmp-file.md b/doc/release-notes/8760-download-tmp-file.md new file mode 100644 index 00000000000..7623a91ac9a --- /dev/null +++ b/doc/release-notes/8760-download-tmp-file.md @@ -0,0 +1,3 @@ +A new API has been added for testing purposes that allows files to be downloaded from /tmp. + +See diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index d6742252d27..7d6545999ca 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -5,6 +5,13 @@ API Changelog :local: :depth: 1 +6.1 +--- + +New +~~~ +- **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`. + 6.0 ----- diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 1992390410c..5b1e7410a4f 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5349,6 +5349,16 @@ A curl example using an ``ID`` Note that this call could be useful in coordinating with dataset authors (assuming they are also contacts) as an alternative/addition to the functionality provided by :ref:`return-a-dataset`. +.. _download-file-from-tmp: + +Download File from /tmp +~~~~~~~~~~~~~~~~~~~~~~~ + +As a superuser:: + + GET /api/admin/downloadTmpFile?fullyQualifiedPathToFile=/tmp/foo.txt + +Note that this API is probably only useful for testing. MyData ------ diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 684ed32dff8..4da1962853a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -123,6 +123,7 @@ import jakarta.ws.rs.QueryParam; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.StreamingOutput; +import java.nio.file.Paths; /** * Where the secure, setup API calls live. @@ -2428,12 +2429,12 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur } /** - * For testing only. Download a file from the file system. + * For testing only. Download a file from /tmp. */ @GET @AuthRequired - @Path("/localfile") - public Response getLocalFile(@Context ContainerRequestContext crc, @QueryParam("pathToFile") String pathToFile) { + @Path("/downloadTmpFile") + public Response downloadTmpFile(@Context ContainerRequestContext crc, @QueryParam("fullyQualifiedPathToFile") String fullyQualifiedPathToFile) { try { AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc); if (!user.isSuperuser()) { @@ -2442,8 +2443,12 @@ public Response getLocalFile(@Context ContainerRequestContext crc, @QueryParam(" } catch (WrappedResponse wr) { return wr.getResponse(); } + java.nio.file.Path normalizedPath = Paths.get(fullyQualifiedPathToFile).normalize(); + if (!normalizedPath.toString().startsWith("/tmp")) { + return error(Status.BAD_REQUEST, "Path must begin with '/tmp' but after normalization was '" + normalizedPath +"'."); + } try { - return ok(new FileInputStream(pathToFile)); + return ok(new FileInputStream(fullyQualifiedPathToFile)); } catch (IOException ex) { return error(Status.BAD_REQUEST, ex.toString()); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 0c5de662e8a..91ba67b10ff 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -854,6 +854,32 @@ public void testBannerMessages(){ } + /** + * For a successful download from /tmp, see BagIT. Here we are doing error + * checking. + */ + @Test + public void testDownloadTmpFile() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response tryToDownloadAsNonSuperuser = UtilIT.downloadTmpFile("/tmp/foo", apiToken); + tryToDownloadAsNonSuperuser.then().assertThat().statusCode(FORBIDDEN.getStatusCode()); + + Response toggleSuperuser = UtilIT.makeSuperUser(username); + toggleSuperuser.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response tryToDownloadEtcPasswd = UtilIT.downloadTmpFile("/etc/passwd", apiToken); + tryToDownloadEtcPasswd.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("Path must begin with '/tmp' but after normalization was '/etc/passwd'.")); + } + private String createTestNonSuperuserApiToken() { Response createUserResponse = UtilIT.createRandomUser(); createUserResponse.then().assertThat().statusCode(OK.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java index fae9cf95156..28f7fa28328 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java @@ -115,7 +115,7 @@ public void testBagItExport() throws IOException { //Bag-Size: 0 bytes //Payload-Oxum: 0.0 //Internal-Sender-Identifier: Root:Darwin's Finches - Response downloadBag = downloadLocalFile(pathToZip, apiToken); + Response downloadBag = UtilIT.downloadTmpFile(pathToZip, apiToken); downloadBag.then().assertThat().statusCode(OK.getStatusCode()); Path outputPath = Paths.get("/tmp/foo.zip"); java.nio.file.Files.copy(downloadBag.getBody().asInputStream(), outputPath, StandardCopyOption.REPLACE_EXISTING); @@ -162,10 +162,4 @@ public static void tearDownClass() { } - static Response downloadLocalFile(String pathToFile, String apiToken) { - return given() - .header("X-Dataverse-key", apiToken) - .get("/api/admin/localfile?pathToFile=" + pathToFile); - } - -} +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e3a7fd0cfc3..6abfb10c4f6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3574,4 +3574,11 @@ static Response getDownloadSize(Integer datasetId, return requestSpecification .get("/api/datasets/" + datasetId + "/versions/" + version + "/downloadsize"); } + + static Response downloadTmpFile(String fullyQualifiedPathToFile, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/admin/downloadTmpFile?fullyQualifiedPathToFile=" + fullyQualifiedPathToFile); + } + } From 06f6222ba785fa37890efa4156ec3e7988fe4ff5 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 20:29:47 -0500 Subject: [PATCH 165/414] more intermediate changes to the entity classes #8549 --- .../edu/harvard/iq/dataverse/DvObject.java | 28 +++++++++++++++++++ .../iq/dataverse/DvObjectContainer.java | 8 ++++-- .../dataverse/ingest/IngestServiceBean.java | 7 +++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 9e7f3f3fe96..b86fabd0a07 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -2,6 +2,8 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; +import edu.harvard.iq.dataverse.storageuse.StorageUse; import java.sql.Timestamp; import java.text.SimpleDateFormat; @@ -156,6 +158,9 @@ public String visit(DataFile df) { private boolean identifierRegistered; + @Column(nullable = true) + private Long storageSize; + private transient GlobalId globalId = null; @OneToMany(mappedBy = "dvObject", cascade = CascadeType.ALL, orphanRemoval = true) @@ -177,6 +182,13 @@ public void setAlternativePersistentIndentifiers(Set saveAndAddFilesToDataset(DatasetVersion version, + List newFiles, + DataFile fileToReplace, + boolean tabIngest) { + return saveAndAddFilesToDataset(version, newFiles, fileToReplace, tabIngest, null); + } public List saveAndAddFilesToDataset(DatasetVersion version, List newFiles, DataFile fileToReplace, From 8766932b6c086b1775e3faf8e19f411d83f87c07 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:09:12 -0500 Subject: [PATCH 166/414] extra logging --- .../iq/dataverse/search/SearchIncludeFragment.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 400f10cc375..c579eb14b7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -343,9 +343,10 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused setSolrErrorEncountered(false); try { - logger.fine("ATTENTION! query from user: " + query); - logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); - logger.fine("ATTENTION! sort by: " + sortField); + logger.info("ATTENTION! query from user: " + query); + logger.info("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); + logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal); + logger.info("ATTENTION! sort by: " + sortField); /** * @todo Number of search results per page should be configurable - @@ -408,6 +409,8 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")"); + logger.info("second pass query: " + queryToPassToSolr); + logger.info("second pass filter query: "+filterQueriesFinalSecondPass.toString()); solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false); From 552e7350cd7f9d9eb577b056e8d3eb414e8dc3cc Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:09:40 -0500 Subject: [PATCH 167/414] get quota command #8549 --- .../impl/GetCollectionQuotaCommand.java | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java new file mode 100644 index 00000000000..f07fde9508e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java @@ -0,0 +1,45 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.util.BundleUtil; +import java.io.IOException; +import java.util.List; +import java.util.logging.Logger; + +/** + * + * @author landreev + * The command doesn't do much. It's sole purpose is to check the permissions + * when it's called by the /api/dataverses/.../storage/quota api. + */ +@RequiredPermissions(Permission.ManageDataversePermissions) +public class GetCollectionQuotaCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + + public GetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + dataverse = target; + } + + @Override + public Long execute(CommandContext ctxt) throws CommandException { + + if (dataverse != null && dataverse.getStorageQuota() != null) { + return dataverse.getStorageQuota().getAllocation(); + } + + return null; + } +} + + From e4aea93f0ada3212d1116b13cd0b2ae8105100e1 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:20:29 -0500 Subject: [PATCH 168/414] extra logging --- .../edu/harvard/iq/dataverse/search/SearchIncludeFragment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index c579eb14b7e..e5b5763efe6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -345,7 +345,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused try { logger.info("ATTENTION! query from user: " + query); logger.info("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); - logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal); + logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString()); logger.info("ATTENTION! sort by: " + sortField); /** From 2b8777990d008b31e61c4338f5b5e964e1f4a20d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:21:17 -0500 Subject: [PATCH 169/414] new classes and instances #8549 --- .../iq/dataverse/storageuse/StorageQuota.java | 118 ++++++++++++++++++ .../iq/dataverse/storageuse/StorageUse.java | 94 ++++++++++++++ .../storageuse/StorageUseServiceBean.java | 65 ++++++++++ 3 files changed, 277 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java new file mode 100644 index 00000000000..68ff6d95d00 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java @@ -0,0 +1,118 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.DvObject; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.OneToOne; +import java.io.Serializable; +import java.util.logging.Logger; + +//import jakarta.persistence.*; + +/** + * + * @author landreev + * + */ +@Entity +public class StorageQuota implements Serializable { + private static final Logger logger = Logger.getLogger(StorageQuota.class.getCanonicalName()); + + /** + * Only Collection quotas are supported, for now + */ + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + /** + * For defining quotas for Users and/or Groups + * (Not supported as of yet) + + @Column(nullable = true) + private String assigneeIdentifier; + */ + + /** + * Could be changed to ManyToOne - if we wanted to be able to define separate + * quotas on the same collection for different users. (?) + * Whether we actually want to support the above is TBD. (possibly not) + * Only collection-wide quotas are supported for now. + */ + @OneToOne + @JoinColumn(name="definitionPoint_id", nullable=true) + private DvObject definitionPoint; + + @Column(nullable = true) + private Long allocation; + + public StorageQuota() {} + + /*public String getAssigneeIdentifier() { + return assigneeIdentifier; + } + + public void setAssigneeIdentifier(String assigneeIdentifier) { + this.assigneeIdentifier = assigneeIdentifier; + }*/ + + public DvObject getDefinitionPoint() { + return definitionPoint; + } + + public void setDefinitionPoint(DvObject definitionPoint) { + this.definitionPoint = definitionPoint; + } + + public Long getAllocation() { + return allocation; + } + + public void setAllocation(Long allocation) { + this.allocation = allocation; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof StorageQuota)) { + return false; + } + StorageQuota other = (StorageQuota) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.storageuse.StorageQuota[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java new file mode 100644 index 00000000000..2633e3e026b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java @@ -0,0 +1,94 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.DvObjectContainer; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GenerationType; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.OneToOne; +import java.io.Serializable; + +/** + * + * @author landreev + */ +@NamedQueries({ + @NamedQuery(name = "StorageUse.findByteSizeByDvContainerId",query = "SELECT su.sizeInBytes FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "), + @NamedQuery(name = "StorageUse.findByDvContainerId",query = "SELECT su FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId ") +}) +@Entity +public class StorageUse implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.AUTO) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + @OneToOne + @JoinColumn(nullable=false) + private DvObject dvObjectContainer; + + @Column + private Long sizeInBytes = null; + + public StorageUse(DvObjectContainer dvObjectContainer, Long sizeInBytes) { + this.dvObjectContainer = dvObjectContainer; + this.sizeInBytes = sizeInBytes; + } + + public Long getSizeInBytes() { + return sizeInBytes; + } + + public void setSizeInBytes(Long sizeInBytes) { + this.sizeInBytes = sizeInBytes; + } + + public void incrementSizeInBytes(Long sizeInBytes) { + this.sizeInBytes += sizeInBytes; + } + + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof StorageUse)) { + return false; + } + StorageUse other = (StorageUse) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.storageuse.StorageUse[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java new file mode 100644 index 00000000000..fd04344c234 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -0,0 +1,65 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.DvObjectContainer; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import java.util.logging.Logger; + +/** + * + * @author landreev + */ +@Stateless +@Named +public class StorageUseServiceBean implements java.io.Serializable { + private static final Logger logger = Logger.getLogger(StorageUseServiceBean.class.getCanonicalName()); + @EJB + DataverseServiceBean dataverseService; + + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; + + public StorageUse findByDvContainerId(Long dvObjectId) { + return em.createNamedQuery("StorageUse.findByDvContainerId", StorageUse.class).setParameter("dvObjectId", dvObjectId).getSingleResult(); + } + + public Long findStorageSizeByDvContainerId(Long dvObjectId) { + return em.createNamedQuery("StorageUse.findByteSizeByDvContainerId", Long.class).setParameter("dvObjectId", dvObjectId).getSingleResult(); + } + + public void incrementStorageSizeHierarchy(DvObjectContainer dvObject, Long filesize) { + incrementStorageSize(dvObject, filesize); + DvObjectContainer parent = dvObject.getOwner(); + while (parent != null) { + incrementStorageSize(parent, filesize); + parent = parent.getOwner(); + } + } + + /** + * Should this be done in a new transaction? + * @param dvObject + * @param filesize + */ + public void incrementStorageSize(DvObjectContainer dvObject, Long filesize) { + StorageUse dvContainerSU = findByDvContainerId(dvObject.getId()); + if (dvContainerSU != null) { + // @todo: named query + dvContainerSU.incrementSizeInBytes(filesize); + em.merge(dvContainerSU); + } else { + dvContainerSU = new StorageUse(dvObject, filesize); + em.persist(dvContainerSU); + } + } + +} From 235b1b018a50fd099c983516b046c6847be41e48 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:44:47 -0500 Subject: [PATCH 170/414] A fix for the missing subtree filter query in the 2nd pass search query. #9635 --- .../search/SearchIncludeFragment.java | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index e5b5763efe6..1acd4b0f8a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -282,7 +282,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused SolrQueryResponse solrQueryResponse = null; SolrQueryResponse solrQueryResponseSecondPass = null; - List filterQueriesFinal = new ArrayList<>(); + List filterQueriesExtended = new ArrayList<>(); if (dataverseAlias != null) { this.dataverse = dataverseService.findByAlias(dataverseAlias); @@ -296,7 +296,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused * @todo centralize this into SearchServiceBean */ if (!isfilterQueryAlreadyInMap(filterDownToSubtree)){ - filterQueriesFinal.add(filterDownToSubtree); + filterQueriesExtended.add(filterDownToSubtree); } // this.dataverseSubtreeContext = dataversePath; } else { @@ -309,7 +309,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused this.setRootDv(true); } - filterQueriesFinal.addAll(filterQueries); + filterQueriesExtended.addAll(filterQueries); /** * Add type queries, for the types (Dataverses, Datasets, Datafiles) @@ -323,7 +323,9 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused selectedTypesHumanReadable = combine(arr, " OR "); if (!selectedTypesHumanReadable.isEmpty()) { typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")"; - } + } + List filterQueriesFinal = new ArrayList<>(); + filterQueriesFinal.addAll(filterQueriesExtended); filterQueriesFinal.add(typeFilterQuery); if (page <= 1) { @@ -343,10 +345,10 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused setSolrErrorEncountered(false); try { - logger.info("ATTENTION! query from user: " + query); - logger.info("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); - logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString()); - logger.info("ATTENTION! sort by: " + sortField); + logger.fine"ATTENTION! query from user: " + query); + logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); + logger.fine("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString()); + logger.fine("ATTENTION! sort by: " + sortField); /** * @todo Number of search results per page should be configurable - @@ -399,7 +401,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused // run a second search to obtain the numbers of the unselected types: List filterQueriesFinalSecondPass = new ArrayList<>(); - filterQueriesFinalSecondPass.addAll(filterQueries); + filterQueriesFinalSecondPass.addAll(filterQueriesExtended); arr = new String[3 - selectedTypesList.size()]; int c = 0; @@ -409,8 +411,8 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")"); - logger.info("second pass query: " + queryToPassToSolr); - logger.info("second pass filter query: "+filterQueriesFinalSecondPass.toString()); + logger.fine("second pass query: " + queryToPassToSolr); + logger.fine("second pass filter query: "+filterQueriesFinalSecondPass.toString()); solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false); From ceeeaecb9d222c2d2073713cdd839dac2ab4a304 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:47:30 -0500 Subject: [PATCH 171/414] typo. #9635 --- .../edu/harvard/iq/dataverse/search/SearchIncludeFragment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 1acd4b0f8a1..dd9cd78982a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -345,7 +345,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused setSolrErrorEncountered(false); try { - logger.fine"ATTENTION! query from user: " + query); + logger.fine("ATTENTION! query from user: " + query); logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); logger.fine("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString()); logger.fine("ATTENTION! sort by: " + sortField); From e4ede35ea8a57afc8830dc63619bed3b660da8ff Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 20 Nov 2023 09:37:27 -0500 Subject: [PATCH 172/414] #9464 fix logger reference --- .../engine/command/impl/ValidateDatasetJsonCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java index ae1a89c3661..619740ddd89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java @@ -21,7 +21,7 @@ @RequiredPermissions(Permission.AddDataset) public class ValidateDatasetJsonCommand extends AbstractCommand { - private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName()); + private static final Logger logger = Logger.getLogger(ValidateDatasetJsonCommand.class.getCanonicalName()); private final Dataverse dataverse; private final String datasetJson; From d30ecfda14bd4adcafced8486d58507aba12c55f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 25 Oct 2023 10:56:14 -0400 Subject: [PATCH 173/414] add S3 tests, LocalStack, MinIO #6783 Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. See S3AccessIT which executes API (end to end) tests. In addition, a new integration test test class (not an API test, the new kind launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. Note that the format of docker-compose-dev.yml had to change to allow for JVM options to be added. Finally, docs were improved for listing and setting stores via API. --- conf/localstack/buckets.sh | 3 + doc/release-notes/6783-s3-tests.md | 3 + .../source/admin/dataverses-datasets.rst | 4 + docker-compose-dev.yml | 78 +++++- pom.xml | 5 + .../harvard/iq/dataverse/api/S3AccessIT.java | 228 +++++++++++++++--- .../dataaccess/S3AccessIOLocalstackIT.java | 153 ++++++++++++ 7 files changed, 436 insertions(+), 38 deletions(-) create mode 100755 conf/localstack/buckets.sh create mode 100644 doc/release-notes/6783-s3-tests.md create mode 100644 src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOLocalstackIT.java diff --git a/conf/localstack/buckets.sh b/conf/localstack/buckets.sh new file mode 100755 index 00000000000..fe940d9890d --- /dev/null +++ b/conf/localstack/buckets.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# https://stackoverflow.com/questions/53619901/auto-create-s3-buckets-on-localstack +awslocal s3 mb s3://mybucket diff --git a/doc/release-notes/6783-s3-tests.md b/doc/release-notes/6783-s3-tests.md new file mode 100644 index 00000000000..1febb87aaed --- /dev/null +++ b/doc/release-notes/6783-s3-tests.md @@ -0,0 +1,3 @@ +Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. See S3AccessIT which executes API (end to end) tests. + +In addition, a new integration test test class (not an API test, the new kind launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index 170807d3d67..37494c57fa1 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -53,11 +53,15 @@ Configure a Dataverse Collection to Store All New Files in a Specific File Store To direct new files (uploaded when datasets are created or edited) for all datasets in a given Dataverse collection, the store can be specified via the API as shown below, or by editing the 'General Information' for a Dataverse collection on the Dataverse collection page. Only accessible to superusers. :: curl -H "X-Dataverse-key: $API_TOKEN" -X PUT -d $storageDriverLabel http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver + +(Note that for ``dataverse.files.store1.label=MyLabel``, you should pass ``MyLabel``.) The current driver can be seen using:: curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver +(Note that for ``dataverse.files.store1.label=MyLabel``, ``store1`` will be returned.) + and can be reset to the default store with:: curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index bb0a4c95b12..769c24fb3a5 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -9,16 +9,38 @@ services: restart: on-failure user: payara environment: - - DATAVERSE_DB_HOST=postgres - - DATAVERSE_DB_PASSWORD=secret - - DATAVERSE_DB_USER=${DATAVERSE_DB_USER} - - ENABLE_JDWP=1 - - DATAVERSE_FEATURE_API_BEARER_AUTH=1 - - DATAVERSE_AUTH_OIDC_ENABLED=1 - - DATAVERSE_AUTH_OIDC_CLIENT_ID=test - - DATAVERSE_AUTH_OIDC_CLIENT_SECRET=94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 - - DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL=http://keycloak.mydomain.com:8090/realms/test - - DATAVERSE_JSF_REFRESH_PERIOD=1 + DATAVERSE_DB_HOST: postgres + DATAVERSE_DB_PASSWORD: secret + DATAVERSE_DB_USER: ${DATAVERSE_DB_USER} + ENABLE_JDWP: "1" + DATAVERSE_FEATURE_API_BEARER_AUTH: "1" + DATAVERSE_AUTH_OIDC_ENABLED: "1" + DATAVERSE_AUTH_OIDC_CLIENT_ID: test + DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 + DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test + DATAVERSE_JSF_REFRESH_PERIOD: "1" + JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 + -Ddataverse.files.file1.type=file + -Ddataverse.files.file1.label=Filesystem + -Ddataverse.files.file1.directory=${STORAGE_DIR}/store + -Ddataverse.files.localstack1.type=s3 + -Ddataverse.files.localstack1.label=LocalStack + -Ddataverse.files.localstack1.custom-endpoint-url=http://localstack:4566 + -Ddataverse.files.localstack1.custom-endpoint-region=us-east-2 + -Ddataverse.files.localstack1.bucket-name=mybucket + -Ddataverse.files.localstack1.path-style-access=true + -Ddataverse.files.localstack1.upload-redirect=false + -Ddataverse.files.localstack1.access-key=default + -Ddataverse.files.localstack1.secret-key=default + -Ddataverse.files.minio1.type=s3 + -Ddataverse.files.minio1.label=MinIO + -Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000 + -Ddataverse.files.minio1.custom-endpoint-region=us-east-1 + -Ddataverse.files.minio1.bucket-name=mybucket + -Ddataverse.files.minio1.path-style-access=true + -Ddataverse.files.minio1.upload-redirect=false + -Ddataverse.files.minio1.access-key=minioadmin + -Ddataverse.files.minio1.secret-key=minioadmin ports: - "8080:8080" # HTTP (Dataverse Application) - "4848:4848" # HTTP (Payara Admin Console) @@ -156,6 +178,42 @@ services: networks: - dataverse + dev_localstack: + container_name: "dev_localstack" + hostname: "localstack" + image: localstack/localstack:2.3.2 + restart: on-failure + ports: + - "127.0.0.1:4566:4566" + environment: + - DEBUG=${DEBUG-} + - DOCKER_HOST=unix:///var/run/docker.sock + - HOSTNAME_EXTERNAL=localstack + networks: + - dataverse + volumes: + - ./conf/localstack:/etc/localstack/init/ready.d + tmpfs: + - /localstack:mode=770,size=128M,uid=1000,gid=1000 + + dev_minio: + container_name: "dev_minio" + hostname: "minio" + image: minio/minio + restart: on-failure + ports: + - "9000:9000" + - "9001:9001" + networks: + - dataverse + volumes: + - minio_storage:/data + environment: + # these are the defaults but are here for clarity + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + command: server /data + networks: dataverse: driver: bridge diff --git a/pom.xml b/pom.xml index 4d10073334f..34b0ad2e835 100644 --- a/pom.xml +++ b/pom.xml @@ -612,6 +612,11 @@ 3.0.0 test + + org.testcontainers + localstack + test + From 4ad06ba1af38cf84f5b639a605eecaf95a4fe8b1 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 30 Nov 2023 15:54:20 -0500 Subject: [PATCH 252/414] rename previewshavefailed to previewimagefail #9506 This matches previewimageavailable, also in dvobject. Plus it's clear we aren't talking about shaving. :) --- .../edu/harvard/iq/dataverse/DataFileServiceBean.java | 2 +- .../iq/dataverse/DatasetVersionServiceBean.java | 4 ++-- src/main/java/edu/harvard/iq/dataverse/DvObject.java | 10 +++++----- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +- .../iq/dataverse/dataaccess/ImageThumbConverter.java | 4 ++-- .../migration/V6.0.0.5__9506-track-thumb-failures.sql | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index fae95f12a0c..446c66e5a8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -987,7 +987,7 @@ public boolean isThumbnailAvailable (DataFile file) { this.save(file); return true; } - file.setPreviewsHaveFailed(true); + file.setPreviewImageFail(true); this.save(file); return false; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index d209f7d9e26..1ee517c9831 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -825,7 +825,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - + "AND o.previewshavefailed = false " + + "AND o.previewimagefail = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + "AND df.contenttype LIKE 'image/%' " @@ -859,7 +859,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - + "AND o.previewshavefailed = false " + + "AND o.previewimagefail = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + "AND df.contenttype = 'application/pdf' " diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 12f0b63b3a1..c6d4a73bfd9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -191,14 +191,14 @@ public void setPreviewImageAvailable(boolean status) { * real failure in generating the thumbnail. In both cases, we won't want to try * again every time the preview/thumbnail is requested for a view. */ - private boolean previewsHaveFailed; + private boolean previewImageFail; - public boolean isPreviewsHaveFailed() { - return previewsHaveFailed; + public boolean isPreviewImageFail() { + return previewImageFail; } - public void setPreviewsHaveFailed(boolean previewsHaveFailed) { - this.previewsHaveFailed = previewsHaveFailed; + public void setPreviewImageFail(boolean previewImageFail) { + this.previewImageFail = previewImageFail; } public Timestamp getModificationTime() { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 2c2f49a0444..b1d31f8d44b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2429,7 +2429,7 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur @DELETE @Path("/clearThumbnailFailureFlag") public Response clearThumbnailFailureFlag() { - em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE").executeUpdate(); + em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE").executeUpdate(); return ok("Thumnail Failure Flags cleared."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index febf659b71a..2de37174a3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -119,9 +119,9 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } private static boolean generateThumbnail(DataFile file, StorageIO storageIO, int size) { - logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); + logger.log(Level.FINE, (file.isPreviewImageFail() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); // Don't try to generate if there have been failures: - if (!file.isPreviewsHaveFailed()) { + if (!file.isPreviewImageFail()) { boolean thumbnailGenerated = false; if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { thumbnailGenerated = generateImageThumbnail(storageIO, size); diff --git a/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql index 9b12d27db91..156960d2011 100644 --- a/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql +++ b/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql @@ -1 +1 @@ -ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewshavefailed BOOLEAN DEFAULT FALSE; \ No newline at end of file +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewimagefail BOOLEAN DEFAULT FALSE; From 7148158dec36576c33c1cbc96143128769dd938a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 30 Nov 2023 15:56:43 -0500 Subject: [PATCH 253/414] add tests #9506 --- .../java/edu/harvard/iq/dataverse/api/AdminIT.java | 10 ++++++++++ .../java/edu/harvard/iq/dataverse/api/UtilIT.java | 14 +++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 0c5de662e8a..c29c8619d8c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -818,6 +818,16 @@ public void testLoadMetadataBlock_ErrorHandling() { message ); } + @Test + public void testClearThumbnailFailureFlag(){ + Response nonExistentFile = UtilIT.clearThumbnailFailureFlag(Long.MAX_VALUE); + nonExistentFile.prettyPrint(); + nonExistentFile.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + + Response clearAllFlags = UtilIT.clearThumbnailFailureFlags(); + clearAllFlags.prettyPrint(); + clearAllFlags.then().assertThat().statusCode(OK.getStatusCode()); + } @Test public void testBannerMessages(){ diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 9b264086c27..58edbae18e0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -223,7 +223,19 @@ public static Response validateDataFileHashValue(String fileId, String apiToken .post("/api/admin/validateDataFileHashValue/" + fileId + "?key=" + apiToken); return response; } - + + public static Response clearThumbnailFailureFlags() { + Response response = given() + .delete("/api/admin/clearThumbnailFailureFlag"); + return response; + } + + public static Response clearThumbnailFailureFlag(long fileId) { + Response response = given() + .delete("/api/admin/clearThumbnailFailureFlag/" + fileId); + return response; + } + private static String getAuthenticatedUserAsJsonString(String persistentUserId, String firstName, String lastName, String authenticationProviderId, String identifier) { JsonObjectBuilder builder = Json.createObjectBuilder(); builder.add("authenticationProviderId", authenticationProviderId); From 67502ca2326b0536077ad96eb0fe497ca70f37f6 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 30 Nov 2023 15:58:18 -0500 Subject: [PATCH 254/414] fix typos #9506 --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index b1d31f8d44b..1445db81e4c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2430,7 +2430,7 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur @Path("/clearThumbnailFailureFlag") public Response clearThumbnailFailureFlag() { em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE").executeUpdate(); - return ok("Thumnail Failure Flags cleared."); + return ok("Thumbnail Failure Flags cleared."); } @DELETE @@ -2441,7 +2441,7 @@ public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String file Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE where id = ?"); deleteQuery.setParameter(1, df.getId()); deleteQuery.executeUpdate(); - return ok("Thumnail Failure Flag cleared for file id=: " + df.getId() + "."); + return ok("Thumbnail Failure Flag cleared for file id=: " + df.getId() + "."); } catch (WrappedResponse r) { logger.info("Could not find file with the id: " + fileId); return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId); From 82f0bc0eef833388b3e20bf48fe8bb46163640ee Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 30 Nov 2023 15:59:05 -0500 Subject: [PATCH 255/414] one more rename to previewimagefail #9506 This should have been part of 4ad06ba1a. --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 1445db81e4c..4cb0521d218 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2438,7 +2438,7 @@ public Response clearThumbnailFailureFlag() { public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String fileId) { try { DataFile df = findDataFileOrDie(fileId); - Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE where id = ?"); + Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE where id = ?"); deleteQuery.setParameter(1, df.getId()); deleteQuery.executeUpdate(); return ok("Thumbnail Failure Flag cleared for file id=: " + df.getId() + "."); From de2f9a4f6beaad2e34249616dd39748c29e15701 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 30 Nov 2023 16:37:35 -0500 Subject: [PATCH 256/414] popup separate tab for single file download transfer --- .../iq/dataverse/FileDownloadServiceBean.java | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 7a03f1a35dc..ca3f5b4bded 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -20,6 +20,8 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; @@ -310,13 +312,19 @@ private void redirectToCustomZipDownloadService(String customZipServiceUrl, Stri } } - private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, Long fileMetadataId) { - String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, fileMetadataId); - logger.fine("Redirecting to file download url: " + fileDownloadUrl); - try { - FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl); - } catch (IOException ex) { - logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex); + private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, + Long fileMetadataId) { + String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, + fileMetadataId); + if (downloadType.equals("GlobusTransfer")) { + PrimeFaces.current().executeScript(URLTokenUtil.getScriptForUrl(fileDownloadUrl)); + } else { + logger.fine("Redirecting to file download url: " + fileDownloadUrl); + try { + FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl); + } catch (IOException ex) { + logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex); + } } } From c82064ace53bcbf5e8b04a24f916fa333f863c9c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 30 Nov 2023 16:38:17 -0500 Subject: [PATCH 257/414] fix old label in popup required case --- src/main/webapp/file-download-button-fragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index 8ef2af40431..318aab1454e 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -80,7 +80,7 @@ - GT: #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} + #{bundle['file.globus.of']} #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} From 2644faee02f7001e51d19e474e3ca5b1b1264302 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 30 Nov 2023 18:03:29 -0500 Subject: [PATCH 258/414] Rearranges the code that updates the Storage Use records to reflect the size of the saved content. #8549 --- .../dataverse/ingest/IngestServiceBean.java | 120 +++++++++++------- 1 file changed, 76 insertions(+), 44 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 9b3ddd228e9..5efb4c06f48 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -177,7 +177,7 @@ public class IngestServiceBean { // It must be called before we attempt to permanently save the files in // the database by calling the Save command on the dataset and/or version. - // There is way too much going on in this method. :( + // !! There is way too much going on in this method. :( !! // @todo: Is this method a good candidate for turning into a dedicated Command? public List saveAndAddFilesToDataset(DatasetVersion version, @@ -195,6 +195,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, // renamed FOOBAR-1.txt back to FOOBAR.txt... IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles, fileToReplace); Dataset dataset = version.getDataset(); + long totalBytesSaved = 0L; if (systemConfig.isStorageQuotasEnforced()) { // Check if this dataset is subject to any storage quotas: @@ -205,6 +206,9 @@ public List saveAndAddFilesToDataset(DatasetVersion version, boolean unattached = false; boolean savedSuccess = false; if (dataFile.getOwner() == null) { + // is it ever "unattached"? + // do we ever call this method with dataFile.getOwner() != null? + // - we really shouldn't be, either. unattached = true; dataFile.setOwner(dataset); } @@ -230,31 +234,38 @@ public List saveAndAddFilesToDataset(DatasetVersion version, dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation); logger.fine("Successfully created a new storageIO object."); - /* - * This commented-out code demonstrates how to copy bytes from a local - * InputStream (or a readChannel) into the writable byte channel of a Dataverse - * DataAccessIO object: + /** + * This commented-out code demonstrates how to copy + * bytes from a local InputStream (or a readChannel) + * into the writable byte channel of a Dataverse + * DataAccessIO object: */ - /* - * storageIO.open(DataAccessOption.WRITE_ACCESS); - * - * writeChannel = storageIO.getWriteChannel(); readChannel = new - * FileInputStream(tempLocationPath.toFile()).getChannel(); - * - * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while ( - * start < readChannel.size() ) { readChannel.transferTo(start, - * bytesPerIteration, writeChannel); start += bytesPerIteration; } + /** + * storageIO.open(DataAccessOption.WRITE_ACCESS); + * + * writeChannel = storageIO.getWriteChannel(); + * readChannel = new + * FileInputStream(tempLocationPath.toFile()).getChannel(); + * + * long bytesPerIteration = 16 * 1024; // 16K bytes long + * start = 0; + * while ( start < readChannel.size() ) { + * readChannel.transferTo(start, bytesPerIteration, writeChannel); start += bytesPerIteration; + * } */ - /* - * But it's easier to use this convenience method from the DataAccessIO: - * - * (if the underlying storage method for this file is local filesystem, the - * DataAccessIO will simply copy the file using Files.copy, like this: - * - * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), - * StandardCopyOption.REPLACE_EXISTING); + /** + * But it's easier to use this convenience method from + * the DataAccessIO: + * + * (if the underlying storage method for this file is + * local filesystem, the DataAccessIO will simply copy + * the file using Files.copy, like this: + * + * Files.copy(tempLocationPath, + * storageIO.getFileSystemLocation(), + * StandardCopyOption.REPLACE_EXISTING); */ dataAccess.savePath(tempLocationPath); @@ -265,7 +276,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, savedSuccess = true; logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); - // TODO: reformat this file to remove the many tabs added in cc08330 + // TODO: reformat this file to remove the many tabs added in cc08330 - done, I think? extractMetadataNcml(dataFile, tempLocationPath); } catch (IOException ioex) { @@ -375,6 +386,15 @@ public List saveAndAddFilesToDataset(DatasetVersion version, if (savedSuccess) { if (uploadSessionQuota != null) { + // It may be worth considering refreshing the quota here, + // and incrementing the Storage Use record for + // all the parent objects in real time, as + // *each* individual file is being saved. I experimented + // with that, but decided against it for performance + // reasons. But yes, there may be some edge case where + // parallel multi-file uploads can end up being able + // to save 2X worth the quota that was available at the + // beginning of each session. if (confirmedFileSize > uploadSessionQuota.getRemainingQuotaInBytes()) { savedSuccess = false; logger.warning("file size over quota limit, skipping"); @@ -382,7 +402,6 @@ public List saveAndAddFilesToDataset(DatasetVersion version, // this (potentially partial) failure to the user. //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(confirmedFileSize), bytesToHumanReadable(storageQuotaLimit))); } else { - // Adjust quota: logger.info("Setting total usage in bytes to " + (uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize)); uploadSessionQuota.setTotalUsageInBytes(uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize); @@ -390,19 +409,12 @@ public List saveAndAddFilesToDataset(DatasetVersion version, } // ... unless we had to reject the file just now because of - // the quota limits, increment the storage use record(s): + // the quota limits, count the number of bytes saved for the + // purposes of incrementing the total storage of the parent + // DvObjectContainers: if (savedSuccess) { - // Update storage use for all the parent dvobjects: - // @todo: Do we want to do this after after *each* file is saved? - there may be - // quite a few files being saved here all at once. We could alternatively - // perform this update only once, after this loop is completed (are there any - // risks/accuracy loss?) - // This update is performed with a direct native query that - // is supposed to be quite fast. But still. - logger.info("Incrementing recorded storage use by " + confirmedFileSize + " bytes for dataset " + dataset.getId()); - // (@todo: need to consider what happens when this code is called on Create?) - storageUseService.incrementStorageSizeRecursively(dataset.getId(), confirmedFileSize); + totalBytesSaved += confirmedFileSize; } } @@ -425,12 +437,14 @@ public List saveAndAddFilesToDataset(DatasetVersion version, boolean metadataExtracted = false; boolean metadataExtractedFromNetcdf = false; if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) { - /* - * Note that we don't try to ingest the file right away - instead we mark it as - * "scheduled for ingest", then at the end of the save process it will be queued - * for async. ingest in the background. In the meantime, the file will be - * ingested as a regular, non-tabular file, and appear as such to the user, - * until the ingest job is finished with the Ingest Service. + /** + * Note that we don't try to ingest the file right away + * - instead we mark it as "scheduled for ingest", then + * at the end of the save process it will be queued for + * async. ingest in the background. In the meantime, the + * file will be ingested as a regular, non-tabular file, + * and appear as such to the user, until the ingest job + * is finished with the Ingest Service. */ dataFile.SetIngestScheduled(); } else if (fileMetadataExtractable(dataFile)) { @@ -488,6 +502,10 @@ public List saveAndAddFilesToDataset(DatasetVersion version, // dataset.getGlobalId()); // Make sure the file is attached to the dataset and to the version, if this // hasn't been done yet: + // @todo: but shouldn't we be doing the reverse if we haven't been + // able to save the file? - disconnect it from the dataset and + // the version?? - L.A. 2023 + // (that said, is there *ever* a case where dataFile.getOwner() != null ?) if (dataFile.getOwner() == null) { dataFile.setOwner(dataset); @@ -503,8 +521,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, DataFileCategory dataFileCategory = dfcIt.next(); if (dataFileCategory.getDataset() == null) { - DataFileCategory newCategory = dataset - .getCategoryByName(dataFileCategory.getName()); + DataFileCategory newCategory = dataset.getCategoryByName(dataFileCategory.getName()); if (newCategory != null) { newCategory.addFileMetadata(dataFile.getFileMetadata()); // dataFileCategory = newCategory; @@ -516,10 +533,25 @@ public List saveAndAddFilesToDataset(DatasetVersion version, } } } + + // Hmm. Noticing that the following two things - adding the + // files to the return list were being + // done outside of this "if (savedSuccess)" block. I'm pretty + // sure that was wrong. - L.A. 11-30-2023 + ret.add(dataFile); + // (unless that is that return value isn't used for anything - ?) } - ret.add(dataFile); } + // Update storage use for all the parent dvobjects: + logger.info("Incrementing recorded storage use by " + totalBytesSaved + " bytes for dataset " + dataset.getId()); + // Q. Need to consider what happens when this code is called on Create? + // A. It works on create as well, yes. (the recursive increment + // query in the method below does need the parent dataset to + // have the database id. But even if these files have been + // uploaded on the Create form, we first save the dataset, and + // then add the files to it. - L.A. + storageUseService.incrementStorageSizeRecursively(dataset.getId(), totalBytesSaved); } return ret; From dc567848bdfcc9647d0779c01bb57f93ab593d89 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 30 Nov 2023 19:10:43 -0500 Subject: [PATCH 259/414] making the set/delete quota commands superuser-only (doh). #8549 --- .../impl/DeleteCollectionQuotaCommand.java | 13 ++++++++++++- .../command/impl/SetCollectionQuotaCommand.java | 16 +++++++++++++--- src/main/java/propertyFiles/Bundle.properties | 1 + .../edu/harvard/iq/dataverse/api/FilesIT.java | 3 +++ 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java index 5fcbad929a9..bdeb9c6e8cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java @@ -6,20 +6,25 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; import edu.harvard.iq.dataverse.storageuse.StorageQuota; +import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.logging.Logger; /** * * @author landreev + * + * A superuser-only command: */ -@RequiredPermissions(Permission.ManageDataversePermissions) +@RequiredPermissions({}) public class DeleteCollectionQuotaCommand extends AbstractVoidCommand { private static final Logger logger = Logger.getLogger(DeleteCollectionQuotaCommand.class.getCanonicalName()); @@ -33,6 +38,12 @@ public DeleteCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) @Override public void executeImpl(CommandContext ctxt) throws CommandException { + // first check if user is a superuser + if ( (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) { + throw new PermissionException(BundleUtil.getStringFromBundle("dataverse.storage.quota.superusersonly"), + this, null, targetDataverse); + } + if (targetDataverse == null) { throw new IllegalCommandException("", this); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java index a134cbefdb9..6b0d1bf313a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; @@ -13,14 +14,18 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; import edu.harvard.iq.dataverse.storageuse.StorageQuota; +import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.logging.Logger; /** * * @author landreev + * + * A superuser-only command: */ -@RequiredPermissions(Permission.ManageDataversePermissions) +@RequiredPermissions({}) public class SetCollectionQuotaCommand extends AbstractVoidCommand { private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName()); @@ -36,13 +41,18 @@ public SetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target, Lo @Override public void executeImpl(CommandContext ctxt) throws CommandException { + // Check if user is a superuser: + if ( (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) { + throw new PermissionException(BundleUtil.getStringFromBundle("dataverse.storage.quota.superusersonly"), + this, null, dataverse); + } if (dataverse == null) { - throw new IllegalCommandException("", this); + throw new IllegalCommandException("Must specify valid collection", this); } if (allocation == null) { - throw new IllegalCommandException("", this); + throw new IllegalCommandException("Must specify valid allocation in bytes", this); } StorageQuota storageQuota = dataverse.getStorageQuota(); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 748b674a4e1..5033426175c 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -925,6 +925,7 @@ dataverse.storage.quota.allocation=Total quota allocation for this collection: { dataverse.storage.quota.notdefined=No quota defined for this collection dataverse.storage.quota.updated=Storage quota successfully set for the collection dataverse.storage.quota.deleted=Storage quota successfully disabled for the collection +dataverse.storage.quota.superusersonly=Only superusers can change storage quotas. dataverse.storage.use=Total recorded size of the files stored in this collection (user-uploaded files plus the versions in the archival tab-delimited format when applicable): {0} bytes dataverse.datasize.ioerror=Fatal IO error while trying to determine the total size of the files stored in the dataverse. Please report this error to the Dataverse administrator. dataverse.inherited=(inherited from enclosing Dataverse) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index e391e17d8d5..915f82a6de2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -2375,6 +2375,9 @@ public void testCollectionStorageQuotas() { Response createUser = UtilIT.createRandomUser(); createUser.then().assertThat().statusCode(OK.getStatusCode()); String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + Response makeSuperUser = UtilIT.makeSuperUser(username); + assertEquals(200, makeSuperUser.getStatusCode()); Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); From f4eee659021dfaab4dfa9c13e761b7c1875281c5 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 30 Nov 2023 19:18:15 -0500 Subject: [PATCH 260/414] removing the license template stubs (#8549) --- .../engine/command/impl/DeleteCollectionQuotaCommand.java | 5 ----- .../engine/command/impl/GetCollectionStorageUseCommand.java | 4 ---- .../engine/command/impl/SetCollectionQuotaCommand.java | 6 ------ .../edu/harvard/iq/dataverse/storageuse/StorageQuota.java | 4 ---- .../edu/harvard/iq/dataverse/storageuse/StorageUse.java | 4 ---- .../iq/dataverse/storageuse/StorageUseServiceBean.java | 4 ---- .../iq/dataverse/storageuse/UploadSessionQuotaLimit.java | 4 ---- 7 files changed, 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java index bdeb9c6e8cb..4015228366b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java @@ -1,11 +1,6 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java index 40b3128b80d..c30a5a34a81 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataverse; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java index 6b0d1bf313a..cf8fb6fd42e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java @@ -1,13 +1,7 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java index 0cfebe4167a..d00f7041e61 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.storageuse; import edu.harvard.iq.dataverse.DvObject; diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java index 11a2a8b706c..240fba1037d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.storageuse; import edu.harvard.iq.dataverse.DvObject; diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java index e92ba43e950..b542a7cd661 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.storageuse; import edu.harvard.iq.dataverse.DvObjectContainer; diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java index 06bbe986f70..f7dac52e886 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.storageuse; /** From 538921061604e4daacd864f8ec3865d6d0642561 Mon Sep 17 00:00:00 2001 From: GPortas Date: Fri, 1 Dec 2023 14:21:35 +0000 Subject: [PATCH 261/414] Stash: working on new canDownloadAtLeastOneFile Datasets API endpoint --- .../iq/dataverse/PermissionServiceBean.java | 8 ++++++ .../harvard/iq/dataverse/api/Datasets.java | 14 +++++++++++ .../harvard/iq/dataverse/api/DatasetsIT.java | 25 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 6 +++++ 4 files changed, 53 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index a1de33a764e..9e6628617ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -837,4 +837,12 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio return false; } + public boolean canDownloadAtLeastOneFile(User requestUser, DatasetVersion datasetVersion) { + for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { + if (userOn(requestUser, fileMetadata.getDataFile()).has(Permission.DownloadFile)) { + return true; + } + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index af6059cf882..a9cfefc33d8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4134,4 +4134,18 @@ public Response getUserPermissionsOnDataset(@Context ContainerRequestContext crc jsonObjectBuilder.add("canDeleteDatasetDraft", permissionService.userOn(requestUser, dataset).has(Permission.DeleteDatasetDraft)); return ok(jsonObjectBuilder); } + + @GET + @AuthRequired + @Path("{id}/versions/{versionId}/canDownloadAtLeastOneFile") + public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + return response(req -> { + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false); + return ok(permissionService.canDownloadAtLeastOneFile(getRequestUser(crc), datasetVersion)); + }, getRequestUser(crc)); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index d20f1e8a58b..945b741a94b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -4121,4 +4121,29 @@ public void testGetUserPermissionsOnDataset() { Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getUserPermissionsOnDataset("testInvalidId", apiToken); getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); } + + @Test + public void testGetCanDownloadAtLeastOneFile() { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + + // Call with valid dataset id + Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, apiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + boolean canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertTrue(canDownloadAtLeastOneFile); + + // Call with invalid dataset id + Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, apiToken); + getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 9b264086c27..bf43733788a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3442,6 +3442,12 @@ static Response getUserPermissionsOnDataset(String datasetId, String apiToken) { .get("/api/datasets/" + datasetId + "/userPermissions"); } + static Response getCanDownloadAtLeastOneFile(String datasetId, String versionId, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/datasets/" + datasetId + "/versions/" + versionId + "/canDownloadAtLeastOneFile"); + } + static Response createFileEmbargo(Integer datasetId, Integer fileId, String dateAvailable, String apiToken) { JsonObjectBuilder jsonBuilder = Json.createObjectBuilder(); jsonBuilder.add("dateAvailable", dateAvailable); From f48f3a84a72b212d66a4bae1c1056e31dc8f7e52 Mon Sep 17 00:00:00 2001 From: GPortas Date: Fri, 1 Dec 2023 14:50:40 +0000 Subject: [PATCH 262/414] Fixed: DatasetVersionFilesServiceBean order by condition for type criteria --- .../DatasetVersionFilesServiceBean.java | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java index 78fd896c897..99c3c65e3b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java @@ -260,22 +260,27 @@ private Predicate createSearchCriteriaPredicate(DatasetVersion datasetVersion, return criteriaBuilder.and(predicates.toArray(new Predicate[]{})); } - private Order createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder, - FileOrderCriteria orderCriteria, - Root fileMetadataRoot) { + private List createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder, + FileOrderCriteria orderCriteria, + Root fileMetadataRoot) { Path label = fileMetadataRoot.get("label"); Path dataFile = fileMetadataRoot.get("dataFile"); Path publicationDate = dataFile.get("publicationDate"); Path createDate = dataFile.get("createDate"); Expression orderByLifetimeExpression = criteriaBuilder.selectCase().when(publicationDate.isNotNull(), publicationDate).otherwise(createDate); - return switch (orderCriteria) { - case NameZA -> criteriaBuilder.desc(label); - case Newest -> criteriaBuilder.desc(orderByLifetimeExpression); - case Oldest -> criteriaBuilder.asc(orderByLifetimeExpression); - case Size -> criteriaBuilder.asc(dataFile.get("filesize")); - case Type -> criteriaBuilder.asc(dataFile.get("contentType")); - default -> criteriaBuilder.asc(label); - }; + List orderList = new ArrayList<>(); + switch (orderCriteria) { + case NameZA -> orderList.add(criteriaBuilder.desc(label)); + case Newest -> orderList.add(criteriaBuilder.desc(orderByLifetimeExpression)); + case Oldest -> orderList.add(criteriaBuilder.asc(orderByLifetimeExpression)); + case Size -> orderList.add(criteriaBuilder.asc(dataFile.get("filesize"))); + case Type -> { + orderList.add(criteriaBuilder.asc(dataFile.get("contentType"))); + orderList.add(criteriaBuilder.asc(label)); + } + default -> orderList.add(criteriaBuilder.asc(label)); + } + return orderList; } private long getOriginalTabularFilesSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { From a29942bf4c8c78d7dee34d61fbb73f44b8ec699e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:08:26 -0500 Subject: [PATCH 263/414] add files not accessible by dataverse flag --- .../dataaccess/AbstractRemoteOverlayAccessIO.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java index 9de6bf69832..16defc26a4f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -48,6 +48,11 @@ public abstract class AbstractRemoteOverlayAccessIO extends static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; protected static final String REMOTE_STORE_NAME = "remote-store-name"; protected static final String REMOTE_STORE_URL = "remote-store-url"; + + // Whether Dataverse can access the file bytes + //Currently True for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits + static final String FILES_NOT_ACCESSIBLE_BY_DATAVERSE = "files-not-accessible-by-dataverse"; + protected StorageIO baseStore = null; protected String path = null; protected PoolingHttpClientConnectionManager cm = null; @@ -329,6 +334,10 @@ protected String getStoragePath() throws IOException { logger.fine("fullStoragePath: " + fullStoragePath); return fullStoragePath; } + + public static boolean isNotDataverseAccessible(String storeId) { + return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(storeId, FILES_NOT_ACCESSIBLE_BY_DATAVERSE)); + } From 0d758398b64521e65c0d0d90d963aeb7b01af42d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:09:03 -0500 Subject: [PATCH 264/414] add Globus store to the normal file upload (as for the remote store) --- .../java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 4a4d3f57f83..a1bcbe49327 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -263,7 +263,8 @@ public static StorageIO createNewStorageIO(T dvObject, S storageIO = new S3AccessIO<>(dvObject, null, storageDriverId); break; case REMOTE: - storageIO = createNewStorageIO(dvObject, storageTag, RemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; + case GLOBUS: + storageIO = createNewStorageIO(dvObject, storageTag, AbstractRemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; break; default: logger.warning("Could not find storage driver for: " + storageTag); From ce8bb6e97ff776777b642ceafb3c1fb7bae6129f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:10:28 -0500 Subject: [PATCH 265/414] add Globus as a download option in file table header requires changes to startGlobusTransfer in separate commit --- src/main/webapp/dataset.xhtml | 2 +- src/main/webapp/filesFragment.xhtml | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 2f76197e508..0b8983a7770 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -230,7 +230,7 @@
  • - +
  • diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index fbc48a0e884..3d28e3170f7 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -436,7 +436,7 @@
    + and !(DatasetPage.isVersionHasTabular()||DatasetPage.isVersionHasGlobus())}"> #{bundle.download}
    -
    + and (DatasetPage.isVersionHasTabular()||DatasetPage.isVersionHasGlobus())}">
    From 8e75a3e2f501b3f0e09fbc9cba9041c52f769737 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:11:56 -0500 Subject: [PATCH 266/414] Add logic for Globus transfer of some files --- .../edu/harvard/iq/dataverse/DatasetPage.java | 112 +++++++++++++----- 1 file changed, 81 insertions(+), 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index a663b8588ad..0b0d0a2e4f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -11,6 +11,9 @@ import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; @@ -361,6 +364,7 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { * other boolean. */ private boolean versionHasTabular = false; + private boolean versionHasGlobus = false; private boolean showIngestSuccess; @@ -2183,10 +2187,19 @@ private String init(boolean initFull) { // the total "originals" size of the dataset with direct custom queries; // then we'll be able to drop the lookup hint for DataTable from the // findDeep() method for the version and further speed up the lookup - // a little bit. + // a little bit. + boolean globusDownloadEnabled = systemConfig.isGlobusDownload(); for (FileMetadata fmd : workingVersion.getFileMetadatas()) { - if (fmd.getDataFile().isTabularData()) { + DataFile df = fmd.getDataFile(); + if (df.isTabularData()) { versionHasTabular = true; + } + if(globusDownloadEnabled) { + if(GlobusAccessibleStore.isGlobusAccessible(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) { + versionHasGlobus= true; + } + } + if(versionHasTabular &&(!globusDownloadEnabled || versionHasGlobus)) { break; } } @@ -2483,6 +2496,10 @@ private DefaultTreeNode createFileTreeNode(FileMetadata fileMetadata, TreeNode p public boolean isVersionHasTabular() { return versionHasTabular; } + + public boolean isVersionHasGlobus() { + return versionHasGlobus; + } public boolean isReadOnly() { return readOnly; @@ -3089,6 +3106,16 @@ public void setSelectedNonDownloadableFiles(List selectedNonDownlo this.selectedNonDownloadableFiles = selectedNonDownloadableFiles; } + private List selectedGlobusTransferableFiles; + + public List getSelectedGlobusTransferableFiles() { + return selectedGlobusTransferableFiles; + } + + public void setSelectedGlobusTransferableFiles(List selectedGlobusTransferableFiles) { + this.selectedGlobusTransferableFiles = selectedGlobusTransferableFiles; + } + public String getSizeOfDataset() { return DatasetUtil.getDownloadSize(workingVersion, false); } @@ -3247,8 +3274,8 @@ public boolean validateFilesForDownload(boolean downloadOriginal){ } } - //if there are two or more files with a total size - //over the zip limit post a "too large" popup + //if there are two or more files, with a total size + //over the zip limit, post a "too large" popup if (bytes > settingsWrapper.getZipDownloadLimit() && selectedDownloadableFiles.size() > 1) { setValidateFilesOutcome("FailSize"); return false; @@ -3257,16 +3284,17 @@ public boolean validateFilesForDownload(boolean downloadOriginal){ // If some of the files were restricted and we had to drop them off the // list, and NONE of the files are left on the downloadable list // - we show them a "you're out of luck" popup: - if (getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { + if (getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { setValidateFilesOutcome("FailRestricted"); return false; } - if (!getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { + if (!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty()) + && !getSelectedNonDownloadableFiles().isEmpty()) { setValidateFilesOutcome("Mixed"); return true; } - + //ToDo - should Mixed not trigger this? if (isTermsPopupRequired() || isGuestbookPopupRequiredAtDownload()) { setValidateFilesOutcome("GuestbookRequired"); } @@ -3302,12 +3330,25 @@ private boolean filterSelectedFiles(){ setSelectedNonDownloadableFiles(new ArrayList<>()); setSelectedRestrictedFiles(new ArrayList<>()); setSelectedUnrestrictedFiles(new ArrayList<>()); + setSelectedGlobusTransferableFiles(new ArrayList<>()); boolean someFiles = false; + boolean globusDownloadEnabled = systemConfig.isGlobusDownload(); for (FileMetadata fmd : this.selectedFiles){ - if(this.fileDownloadHelper.canDownloadFile(fmd)){ + boolean downloadable=this.fileDownloadHelper.canDownloadFile(fmd); + + boolean globusTransferable = false; + if(globusDownloadEnabled) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId); + downloadable = downloadable && !AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(driverId); + } + if(downloadable){ getSelectedDownloadableFiles().add(fmd); someFiles=true; + } else if(globusTransferable) { + getSelectedGlobusTransferableFiles().add(fmd); + someFiles=true; } else { getSelectedNonDownloadableFiles().add(fmd); } @@ -5247,7 +5288,7 @@ public boolean isFileAccessRequestMultiButtonEnabled(){ } return false; } - +/* These appear to be unused - toDo - delete private Boolean downloadButtonAllEnabled = null; public boolean isDownloadAllButtonEnabled() { @@ -5276,7 +5317,7 @@ public boolean isDownloadSelectedButtonEnabled(){ } return false; } - +*/ public boolean isFileAccessRequestMultiSignUpButtonRequired(){ if (isSessionUserAuthenticated()){ return false; @@ -6277,28 +6318,37 @@ public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } - public void startGlobusTransfer() { - ApiToken apiToken = null; - User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privUrl.getToken()); - } - if(fileMetadataForAction!=null) { - List downloadFMList = new ArrayList(1); - downloadFMList.add(fileMetadataForAction); - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList)); - } else { - if(getSelectedDownloadableFiles()!=null) { - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, getSelectedDownloadableFiles())); + public void startGlobusTransfer(boolean transferAll) { + if(transferAll) { + this.setSelectedFiles(workingVersion.getFileMetadatas()); + } + boolean validated = validateFilesForDownload(true); + if (validated) { + ApiToken apiToken = null; + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); + } else if (user instanceof PrivateUrlUser) { + PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; + PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); + apiToken = new ApiToken(); + apiToken.setTokenString(privUrl.getToken()); + } + if (fileMetadataForAction != null) { + List downloadFMList = new ArrayList(1); + downloadFMList.add(fileMetadataForAction); + PrimeFaces.current() + .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList)); } else { - //ToDo: For non-public, need the subset that are downloadable by the user - //ToDo: For mixed (some in backing store), need the ones in the globus store - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, workingVersion.getFileMetadatas())); + if (getSelectedGlobusTransferableFiles() != null) { + PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, + getSelectedGlobusTransferableFiles())); + } else { + // ToDo: For non-public, need the subset that are downloadable by the user + // ToDo: For mixed (some in backing store), need the ones in the globus store + PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, + workingVersion.getFileMetadatas())); + } } } } From 0e91e6ae59020991513add7e14e09c69641ee71e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:12:20 -0500 Subject: [PATCH 267/414] Convenience method to get store id for a file --- src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index df0c3e5a019..776d04e98cc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1797,5 +1797,11 @@ public static boolean isActivelyEmbargoed(List fmdList) { } return false; } + + + public static String getStorageDriver(DataFile dataFile) { + String storageIdentifier = dataFile.getStorageIdentifier(); + return storageIdentifier.substring(0, storageIdentifier.indexOf(DataAccess.SEPARATOR)); + } } From e5bf3001e39bf8362f9025e85cf3f6626baf15d0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:14:41 -0500 Subject: [PATCH 268/414] skip inaccessible files when doing validatation --- .../command/impl/FinalizeDatasetPublicationCommand.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 3da087addd9..89cfc732455 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -32,15 +32,13 @@ import java.util.logging.Logger; import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.util.FileUtil; import java.util.ArrayList; import java.util.concurrent.Future; import org.apache.solr.client.solrj.SolrServerException; -import jakarta.ejb.EJB; -import jakarta.inject.Inject; - /** * @@ -350,7 +348,8 @@ private void validateDataFiles(Dataset dataset, CommandContext ctxt) throws Comm // (the decision was made to validate all the files on every // major release; we can revisit the decision if there's any // indication that this makes publishing take significantly longer. - if (maxFileSize == -1 || dataFile.getFilesize() < maxFileSize) { + String driverId = FileUtil.getStorageDriver(dataFile); + if(StorageIO.isDataverseAccessible(driverId) && maxFileSize == -1 || dataFile.getFilesize() < maxFileSize) { FileUtil.validateDataFileChecksum(dataFile); } else { From 534c99bb0376aeaa25f2d9d54cbe68a8bfb3b6bc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:15:23 -0500 Subject: [PATCH 269/414] Convenience method re: store supports globus access --- .../iq/dataverse/dataaccess/GlobusAccessibleStore.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java index aad1dab5eab..d827e40e807 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -58,4 +58,11 @@ public static String getGlobusToken(String storeId) { return StorageIO.getConfigParamForDriver(storeId, GLOBUS_TOKEN); } + public static boolean isGlobusAccessible(String storeId) { + if(StorageIO.getConfigParamForDriver(storeId, StorageIO.TYPE).equals(DataAccess.GLOBUS)) { + return true; + } + return false; + } + } From ca1a4f1267b2d52cd38054cca61fbddf6941522b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:16:12 -0500 Subject: [PATCH 270/414] Update to use new isNotDataverseAccessible method in getInputStream --- .../iq/dataverse/dataaccess/GlobusOverlayAccessIO.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 7ec1e2f9e73..3e72fa85d35 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -232,7 +232,9 @@ public long retrieveSizeFromMedia() { @Override public InputStream getInputStream() throws IOException { - if(Boolean.parseBoolean(getConfigParam("endpoint-maps-to-base-store"))) { + //Currently only supported when using an S3 store with the Globus S3Connector. + //ToDo: Support when using a managed Globus endpoint that supports http access + if(!AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(endpoint)) { return baseStore.getInputStream(); } else { throw new IOException("Not implemented"); From f39fa0715e81aafefd14c92c50171eb436a45491 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:17:03 -0500 Subject: [PATCH 271/414] Convenience method isDataverseAccessible --- .../edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 14fc9254c59..51cdecf64a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -57,6 +57,8 @@ public abstract class StorageIO { static final String UPLOAD_REDIRECT = "upload-redirect"; static final String UPLOAD_OUT_OF_BAND = "upload-out-of-band"; protected static final String DOWNLOAD_REDIRECT = "download-redirect"; + protected static final String DATAVERSE_INACCESSIBLE = "dataverse-inaccessible"; + public StorageIO() { @@ -620,6 +622,11 @@ public static boolean isDirectUploadEnabled(String driverId) { || Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_OUT_OF_BAND)); } + //True by default, Stores (e.g. RemoteOverlay, Globus) can set this false to stop attempts to read bytes + public static boolean isDataverseAccessible(String driverId) { + return (true && !Boolean.parseBoolean(getConfigParamForDriver(driverId, DATAVERSE_INACCESSIBLE))); + } + // Check that storageIdentifier is consistent with store's config // False will prevent direct uploads static boolean isValidIdentifier(String driverId, String storageId) { From dc4580232dcfe698010cdc4c20fb77c19482484b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:18:05 -0500 Subject: [PATCH 272/414] use correct term (though up and down terms are the same) could also fix for native/http, but not for rsync --- src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index e40f55fedd8..3c6992f8ec3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -941,7 +941,7 @@ public boolean isHTTPDownload() { } public boolean isGlobusDownload() { - return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), false); + return getMethodAvailable(FileDownloadMethods.GLOBUS.toString(), false); } public boolean isGlobusFileDownload() { From 0bfbb10c355ea1ebc24d2d8bee928c50ca22db41 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 1 Dec 2023 16:59:38 -0500 Subject: [PATCH 273/414] "manage collections" guide entry. #8549 --- .../source/admin/collectionquotas.rst | 17 +++++++++++++++++ doc/sphinx-guides/source/admin/index.rst | 1 + doc/sphinx-guides/source/api/native-api.rst | 12 +++++++++++- .../iq/dataverse/storageuse/StorageUse.java | 3 +++ 4 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 doc/sphinx-guides/source/admin/collectionquotas.rst diff --git a/doc/sphinx-guides/source/admin/collectionquotas.rst b/doc/sphinx-guides/source/admin/collectionquotas.rst new file mode 100644 index 00000000000..883b6cf0c93 --- /dev/null +++ b/doc/sphinx-guides/source/admin/collectionquotas.rst @@ -0,0 +1,17 @@ +Storage Quotas for Collections +============================== + +Please note that this is a new and still experimental feature (as of Dataverse v6.1 release). + +Instance admins can now define storage quota limits for specific collections. These limits can be set, changed and/or deleted via the provided APIs (please see the :ref:`collection-storage-quotas` section of the :doc:`/api/native-api` guide). The Read version of the API is available to the individual collection admins (i.e., a collection owner can check on the quota configured for their collection), but only superusers can set, change or disable storage quotas. + +Storage quotas are *inherited* by subcollections. In other words, when storage use limit is set for a specific collection, it applies to all the datasets immediately under it and in its sub-collections, unless different quotas are defined there and so on. Each file added to any dataset in that hierarchy counts for the purposes of the quota limit defined for the top collection. A storage quota defined on a child sub-collection overrides whatever quota that may be defined on the parent, or inherited from an ancestor. + +For example, a collection ``A`` has the storage quota set to 10GB. It has 3 sub-collections, ``B``, ``C`` and ``D``. Users can keep uploading files into the datasets anywhere in this hierarchy until the combined size of 10GB is reached between them. However, if an admin has reasons to limit one of the sub-collections, ``B`` to 3GB only, that quota can be explicitly set there. This both limits the growth of ``B`` to 3GB, and also *guarantees* that allocation to it. I.e. the contributors to collection ``B`` will be able to keep adding data until the 3GB limit is reached, even after the parent collection ``A`` reaches the combined 10GB limit (at which point ``A`` and all its subcollections except for ``B`` will become read-only). + +We do not yet know whether this is going to be a popular, or needed use case - a child collection quota that is different from the quota it inherits from a parent. It is likely that for many instances it will be sufficient to be able to define quotas for collections and have them apply to all the child objects underneath. We will examine the response to this feature and consider making adjustments to this scheme based on it. We are already considering introducing other types of quotas, such as limits by users or specific storage volumes. + +Please note that only the sizes of the main datafiles and the archival tab-delimited format versions, as produced by the ingest process are counted for the purposes of enforcing the limits. Automatically generated "auxiliary" files, such as rescaled image thumbnails and metadata exports for datasets are not. + +When quotas are set and enforced, the users will be informed of the remaining storage allocation on the file upload page together with other upload and processing limits. + diff --git a/doc/sphinx-guides/source/admin/index.rst b/doc/sphinx-guides/source/admin/index.rst index ac81aa737a7..633842044b4 100755 --- a/doc/sphinx-guides/source/admin/index.rst +++ b/doc/sphinx-guides/source/admin/index.rst @@ -27,6 +27,7 @@ This guide documents the functionality only available to superusers (such as "da solr-search-index ip-groups mail-groups + collectionquotas monitoring reporting-tools-and-queries maintenance diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 234d5f37232..7bd334f6a95 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -763,7 +763,8 @@ Collection Storage Quotas curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota" -Will output the storage quota allocated (in bytes), or a message indicating that the quota is not defined for the collection. +Will output the storage quota allocated (in bytes), or a message indicating that the quota is not defined for the specific collection. The user identified by the API token must have the ``Manage`` permission on the collection. + To set or change the storage allocation quota for a collection: @@ -771,13 +772,22 @@ To set or change the storage allocation quota for a collection: curl -X PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota/$SIZE_IN_BYTES" +This is API is superuser-only. + + To delete a storage quota configured for a collection: .. code-block:: curl -X DELETE -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota" +This is API is superuser-only. + +Use the ``/settings`` API to enable or disable the enforcement of storage quotas that are defined across the instance via the following setting. For example, + +.. code-block:: + curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:UseStorageQuotas Datasets diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java index 240fba1037d..b777736dc8d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java @@ -7,10 +7,12 @@ import jakarta.persistence.GenerationType; import jakarta.persistence.GeneratedValue; import jakarta.persistence.Id; +import jakarta.persistence.Index; import jakarta.persistence.JoinColumn; import jakarta.persistence.NamedQueries; import jakarta.persistence.NamedQuery; import jakarta.persistence.OneToOne; +import jakarta.persistence.Table; import java.io.Serializable; /** @@ -23,6 +25,7 @@ @NamedQuery(name = "StorageUse.incrementByteSizeByDvContainerId", query = "UPDATE StorageUse su SET su.sizeInBytes = su.sizeInBytes +:fileSize WHERE su.dvObjectContainer.id =:dvObjectId") }) @Entity +@Table(indexes = {@Index(columnList="dvobjectcontainer_id")}) public class StorageUse implements Serializable { private static final long serialVersionUID = 1L; From 9af23d23d97413338ce2b800697b19970aca3dd5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 17:23:09 -0500 Subject: [PATCH 274/414] add mixed/other dialogs for transfer case --- .../edu/harvard/iq/dataverse/DatasetPage.java | 92 ++++++++++++------- src/main/java/propertyFiles/Bundle.properties | 6 +- src/main/webapp/dataset.xhtml | 48 ++++++++-- src/main/webapp/filesFragment.xhtml | 10 +- 4 files changed, 110 insertions(+), 46 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0b0d0a2e4f5..47a32987b0b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -365,6 +365,7 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { */ private boolean versionHasTabular = false; private boolean versionHasGlobus = false; + private boolean globusTransferRequested = false; private boolean showIngestSuccess; @@ -3116,6 +3117,16 @@ public void setSelectedGlobusTransferableFiles(List selectedGlobus this.selectedGlobusTransferableFiles = selectedGlobusTransferableFiles; } + private List selectedNonGlobusTransferableFiles; + + public List getSelectedNonGlobusTransferableFiles() { + return selectedNonGlobusTransferableFiles; + } + + public void setSelectedNonGlobusTransferableFiles(List selectedNonGlobusTransferableFiles) { + this.selectedNonGlobusTransferableFiles = selectedNonGlobusTransferableFiles; + } + public String getSizeOfDataset() { return DatasetUtil.getDownloadSize(workingVersion, false); } @@ -3227,7 +3238,7 @@ private void startDownload(boolean downloadOriginal){ boolean guestbookRequired = isDownloadPopupRequired(); boolean validate = validateFilesForDownload(downloadOriginal); if (validate) { - updateGuestbookResponse(guestbookRequired, downloadOriginal); + updateGuestbookResponse(guestbookRequired, downloadOriginal, false); if(!guestbookRequired && !getValidateFilesOutcome().equals("Mixed")){ startMultipleFileDownload(); } @@ -3289,8 +3300,9 @@ public boolean validateFilesForDownload(boolean downloadOriginal){ return false; } - if (!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty()) - && !getSelectedNonDownloadableFiles().isEmpty()) { + //Some are selected and there are non-downloadable ones or there are both downloadable and globus transferable files + if ((!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty()) + && (!getSelectedNonDownloadableFiles().isEmpty()) || (!getSelectedDownloadableFiles().isEmpty() && !getSelectedGlobusTransferableFiles().isEmpty()))) { setValidateFilesOutcome("Mixed"); return true; } @@ -3302,7 +3314,7 @@ public boolean validateFilesForDownload(boolean downloadOriginal){ } - private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal) { + private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal, boolean isGlobusTransfer) { // Note that the GuestbookResponse object may still have information from // the last download action performed by the user. For example, it may // still have the non-null Datafile in it, if the user has just downloaded @@ -3310,7 +3322,11 @@ private void updateGuestbookResponse (boolean guestbookRequired, boolean downloa // even if that's not what they are trying to do now. // So make sure to reset these values: guestbookResponse.setDataFile(null); - guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString()); + if(isGlobusTransfer) { + guestbookResponse.setSelectedFileIds(getFilesIdsString(getSelectedGlobusTransferableFiles())); + } else { + guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString()); + } if (downloadOriginal) { guestbookResponse.setFileFormat("original"); } else { @@ -3331,6 +3347,7 @@ private boolean filterSelectedFiles(){ setSelectedRestrictedFiles(new ArrayList<>()); setSelectedUnrestrictedFiles(new ArrayList<>()); setSelectedGlobusTransferableFiles(new ArrayList<>()); + setSelectedNonGlobusTransferableFiles(new ArrayList<>()); boolean someFiles = false; boolean globusDownloadEnabled = systemConfig.isGlobusDownload(); @@ -3346,11 +3363,14 @@ private boolean filterSelectedFiles(){ if(downloadable){ getSelectedDownloadableFiles().add(fmd); someFiles=true; - } else if(globusTransferable) { + } else { + getSelectedNonDownloadableFiles().add(fmd); + } + if(globusTransferable) { getSelectedGlobusTransferableFiles().add(fmd); someFiles=true; } else { - getSelectedNonDownloadableFiles().add(fmd); + getSelectedNonGlobusTransferableFiles().add(fmd); } if(fmd.isRestricted()){ getSelectedRestrictedFiles().add(fmd); //might be downloadable to user or not @@ -6318,37 +6338,45 @@ public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } - public void startGlobusTransfer(boolean transferAll) { - if(transferAll) { + public boolean isGlobusTransferRequested() { + return globusTransferRequested; + } + + public void startGlobusTransfer(boolean transferAll, boolean popupShown) { + if (transferAll) { this.setSelectedFiles(workingVersion.getFileMetadatas()); } + boolean guestbookRequired = isDownloadPopupRequired(); + boolean validated = validateFilesForDownload(true); if (validated) { - ApiToken apiToken = null; - User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privUrl.getToken()); - } - if (fileMetadataForAction != null) { - List downloadFMList = new ArrayList(1); - downloadFMList.add(fileMetadataForAction); - PrimeFaces.current() - .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList)); - } else { - if (getSelectedGlobusTransferableFiles() != null) { - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, - getSelectedGlobusTransferableFiles())); + globusTransferRequested = true; + boolean mixed = "Mixed".equals(getValidateFilesOutcome()); + // transfer is + updateGuestbookResponse(guestbookRequired, true, true); + if ((!guestbookRequired && !mixed) || popupShown) { + ApiToken apiToken = null; + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); + } else if (user instanceof PrivateUrlUser) { + PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; + PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); + apiToken = new ApiToken(); + apiToken.setTokenString(privUrl.getToken()); + } + if (fileMetadataForAction != null) { + List downloadFMList = new ArrayList(1); + downloadFMList.add(fileMetadataForAction); + PrimeFaces.current() + .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList)); } else { - // ToDo: For non-public, need the subset that are downloadable by the user - // ToDo: For mixed (some in backing store), need the ones in the globus store - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, - workingVersion.getFileMetadatas())); + if (getSelectedGlobusTransferableFiles() != null) { + PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, + getSelectedGlobusTransferableFiles())); + } } + globusTransferRequested = false; } } } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 13e3a675a27..65dd020f27b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -64,6 +64,7 @@ manager=Manager curator=Curator explore=Explore download=Download +transfer=Globus Transfer downloadOriginal=Original Format downloadArchival=Archival Format (.tab) deaccession=Deaccession @@ -1391,6 +1392,7 @@ dataset.accessBtn.header.explore=Explore Options dataset.accessBtn.header.configure=Configure Options dataset.accessBtn.header.compute=Compute Options dataset.accessBtn.download.size=ZIP ({0}) +dataset.accessBtn.transfer.size=({0}) dataset.accessBtn.too.big=The dataset is too large to download. Please select the files you need from the files table. dataset.accessBtn.original.too.big=The dataset is too large to download in the original format. Please select the files you need from the files table. dataset.accessBtn.archival.too.big=The dataset is too large to download in the archival format. Please select the files you need from the files table. @@ -1655,8 +1657,10 @@ dataset.inValidSelectedFilesForDownloadWithEmbargo=Embargoed and/or Restricted F dataset.noValidSelectedFilesForDownload=The selected file(s) may not be downloaded because you have not been granted access. dataset.mixedSelectedFilesForDownload=The restricted file(s) selected may not be downloaded because you have not been granted access. dataset.mixedSelectedFilesForDownloadWithEmbargo=The embargoed and/or restricted file(s) selected may not be downloaded because you have not been granted access. - +dataset.mixedSelectedFilesForTransfer=Some file(s) cannot be transferred. (They are restricted, embargoed, or not Globus accessible.) +dataset.inValidSelectedFilesForTransfer=Ineligible Files Selected dataset.downloadUnrestricted=Click Continue to download the files you have access to download. +dataset.transferUnrestricted=Click Continue to transfer the elligible files. dataset.requestAccessToRestrictedFiles=You may request access to the restricted file(s) by clicking the Request Access button. dataset.requestAccessToRestrictedFilesWithEmbargo=Embargoed files cannot be accessed during the embargo period. If your selection contains restricted files, you may request access to them by clicking the Request Access button. diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 0b8983a7770..e50e68ec162 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -178,7 +178,7 @@
  • + oncomplete="showPopup(false);"> #{bundle.download} @@ -192,7 +192,7 @@
  • #{bundle.downloadOriginal} @@ -208,7 +208,7 @@
  • - #{bundle.downloadArchival} @@ -230,9 +230,14 @@
  • - - - + + #{bundle.transfer} + + + + +
  • @@ -1095,6 +1100,28 @@ + +

    #{bundle['dataset.mixedSelectedFilesForTransfer']}

    + + + + + + +
    #{resFile.label}
    +
    +

    #{bundle['dataset.transferUnrestricted']}

    + + + +
    +

    #{bundle['file.deleteDialog.tip']}

    @@ -1545,6 +1572,7 @@ + @@ -1911,10 +1939,14 @@ $('button[id$="updateOwnerDataverse"]').trigger('click'); } - function showPopup() { + function showPopup(isTransfer) { var outcome = document.getElementById("datasetForm:validateFilesOutcome").value; if (outcome ==='Mixed'){ - PF('downloadMixed').show(); + if(isTransfer) { + PF('globusTransferMixed').show(); + } else { + PF('downloadMixed').show(); + } } if (outcome ==='FailEmpty'){ PF('selectFilesForDownload').show(); diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index 3d28e3170f7..58899ab7062 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -442,7 +442,7 @@ disabled="#{false and DatasetPage.lockedFromDownload}" onclick="if (!testFilesSelected()) return false;" action="#{DatasetPage.startDownloadSelectedOriginal()}" - update="@form" oncomplete="showPopup();"> + update="@form" oncomplete="showPopup(false);"> #{bundle.download} @@ -459,7 +459,7 @@
  • @@ -470,7 +470,7 @@
  • @@ -481,9 +481,9 @@
  • + actionListener="#{DatasetPage.startGlobusTransfer(false, false)}"> #{bundle['file.globus.transfer']} From 43105d31ae3d5357e450da3a98cac6886e18a1d3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 2 Dec 2023 13:14:28 -0500 Subject: [PATCH 275/414] refactor, handle guestbook at download case --- .../edu/harvard/iq/dataverse/DatasetPage.java | 29 ++----- .../iq/dataverse/FileDownloadHelper.java | 36 ++++---- .../iq/dataverse/GuestbookResponse.java | 2 +- .../dataverse/api/DownloadInstanceWriter.java | 6 +- .../dataverse/globus/GlobusServiceBean.java | 86 ++++++++++++++++--- .../guestbook-terms-popup-fragment.xhtml | 13 ++- 6 files changed, 115 insertions(+), 57 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 47a32987b0b..830e146fa07 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3321,7 +3321,11 @@ private void updateGuestbookResponse (boolean guestbookRequired, boolean downloa // a single file; or it may still have the format set to "original" - // even if that's not what they are trying to do now. // So make sure to reset these values: - guestbookResponse.setDataFile(null); + if(fileMetadataForAction == null) { + guestbookResponse.setDataFile(null); + } else { + guestbookResponse.setDataFile(fileMetadataForAction.getDataFile()); + } if(isGlobusTransfer) { guestbookResponse.setSelectedFileIds(getFilesIdsString(getSelectedGlobusTransferableFiles())); } else { @@ -6355,27 +6359,8 @@ public void startGlobusTransfer(boolean transferAll, boolean popupShown) { // transfer is updateGuestbookResponse(guestbookRequired, true, true); if ((!guestbookRequired && !mixed) || popupShown) { - ApiToken apiToken = null; - User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privUrl.getToken()); - } - if (fileMetadataForAction != null) { - List downloadFMList = new ArrayList(1); - downloadFMList.add(fileMetadataForAction); - PrimeFaces.current() - .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList)); - } else { - if (getSelectedGlobusTransferableFiles() != null) { - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, - getSelectedGlobusTransferableFiles())); - } - } + boolean doNotSaveGuestbookResponse = workingVersion.isDraft(); + globusService.writeGuestbookAndStartTransfer(guestbookResponse, doNotSaveGuestbookResponse); globusTransferRequested = false; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java index a6ae7223d9d..4d8100124ec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java @@ -9,6 +9,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.externaltools.ExternalTool; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; @@ -53,6 +54,9 @@ public class FileDownloadHelper implements java.io.Serializable { @EJB DataFileServiceBean datafileService; + + @EJB + GlobusServiceBean globusService; private final Map fileDownloadPermissionMap = new HashMap<>(); // { FileMetadata.id : Boolean } @@ -60,32 +64,32 @@ public FileDownloadHelper() { this.filesForRequestAccess = new ArrayList<>(); } - // See also @Size(max = 255) in GuestbookResponse - private boolean testResponseLength(String value) { - return !(value != null && value.length() > 255); - } - // This helper method is called from the Download terms/guestbook/etc. popup, // when the user clicks the "ok" button. We use it, instead of calling // downloadServiceBean directly, in order to differentiate between single // file downloads and multiple (batch) downloads - since both use the same // terms/etc. popup. - public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse) { + public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse, boolean isGlobusTransfer) { PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); // Note that this method is only ever called from the file-download-popup - // meaning we know for the fact that we DO want to save this // guestbookResponse permanently in the database. - if (guestbookResponse.getSelectedFileIds() != null) { - // this is a batch (multiple file) download. - // Although here's a chance that this is not really a batch download - i.e., - // there may only be one file on the file list. But the fileDownloadService - // method below will check for that, and will redirect to the single download, if - // that's the case. -- L.A. - fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse); - } else if (guestbookResponse.getDataFile() != null) { - // this a single file download: - fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse); + if(isGlobusTransfer) { + globusService.writeGuestbookAndStartTransfer(guestbookResponse, true); + } else { + if (guestbookResponse.getSelectedFileIds() != null) { + // this is a batch (multiple file) download. + // Although here's a chance that this is not really a batch download - i.e., + // there may only be one file on the file list. But the fileDownloadService + // method below will check for that, and will redirect to the single download, + // if + // that's the case. -- L.A. + fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse); + } else if (guestbookResponse.getDataFile() != null) { + // this a single file download: + fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse); + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java index 976f1e084ac..9041ccf887c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java @@ -99,7 +99,7 @@ public class GuestbookResponse implements Serializable { */ public static final String ACCESS_REQUEST = "AccessRequest"; - static final String DOWNLOAD = "Download"; + public static final String DOWNLOAD = "Download"; static final String SUBSET = "Subset"; static final String EXPLORE = "Explore"; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index cc064976982..bcb8799ec9e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -213,9 +213,9 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] if (di.getConversionParam().equals("format")) { if ("GlobusTransfer".equals(di.getConversionParamValue())) { - List downloadFMList = new ArrayList(1); - downloadFMList.add(dataFile.getFileMetadata()); - redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, downloadFMList); + List downloadDFList = new ArrayList(1); + downloadDFList.add(dataFile); + redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, downloadDFList); } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index d8742fc90d5..0c991424ce9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -50,15 +50,19 @@ import java.util.stream.IntStream; import org.apache.commons.codec.binary.StringUtils; +import org.primefaces.PrimeFaces; import com.google.gson.Gson; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -73,21 +77,22 @@ public class GlobusServiceBean implements java.io.Serializable { @EJB protected DatasetServiceBean datasetSvc; - @EJB protected SettingsServiceBean settingsSvc; - @Inject DataverseSession session; - @EJB protected AuthenticationServiceBean authSvc; - @EJB EjbDataverseEngine commandEngine; - @EJB UserNotificationServiceBean userNotificationService; + @EJB + PrivateUrlServiceBean privateUrlService; + @EJB + FileDownloadServiceBean fileDownloadService; + @EJB + DataFileServiceBean dataFileService; private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @@ -600,7 +605,7 @@ public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); } - public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List fileMetadataList) { + public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List dataFiles) { String localeCode = session.getLocaleCode(); ApiToken apiToken = null; User user = session.getUser(); @@ -629,10 +634,6 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List downloadDFList = new ArrayList(1); + downloadDFList.add(df); + if (!doNotSaveGuestbookResponse) { + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + } + PrimeFaces.current() + .executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList)); + } else { + //Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload + List list = new ArrayList<>(Arrays.asList(guestbookResponse.getSelectedFileIds().split(","))); + List selectedFiles = new ArrayList(); + for (String idAsString : list) { + try { + Long fileId = Long.parseLong(idAsString); + // If we need to create a GuestBookResponse record, we have to + // look up the DataFile object for this file: + if (!doNotSaveGuestbookResponse) { + df = dataFileService.findCheapAndEasy(fileId); + guestbookResponse.setDataFile(df); + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + selectedFiles.add(df); + } + } catch (NumberFormatException nfe) { + logger.warning("A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: " + idAsString); + return; + } + + } + if (!selectedFiles.isEmpty()) { + //Use dataset from one file - files should all be from the same dataset + PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, + selectedFiles)); + } + } + } } diff --git a/src/main/webapp/guestbook-terms-popup-fragment.xhtml b/src/main/webapp/guestbook-terms-popup-fragment.xhtml index 34df0c79390..5948047d845 100644 --- a/src/main/webapp/guestbook-terms-popup-fragment.xhtml +++ b/src/main/webapp/guestbook-terms-popup-fragment.xhtml @@ -274,8 +274,17 @@ + + + + From a76158f5903ec73a78b284de90d6491a7e05bfce Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 2 Dec 2023 13:35:33 -0500 Subject: [PATCH 276/414] suppress download entry when not accessible, refactor --- .../edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../harvard/iq/dataverse/SettingsWrapper.java | 22 +++ .../file-download-button-fragment.xhtml | 6 +- .../dataaccess/GlobusOverlayAccessIOTest.java | 176 ++++++++++++++++++ 4 files changed, 202 insertions(+), 4 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 830e146fa07..704c1d42228 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3354,7 +3354,7 @@ private boolean filterSelectedFiles(){ setSelectedNonGlobusTransferableFiles(new ArrayList<>()); boolean someFiles = false; - boolean globusDownloadEnabled = systemConfig.isGlobusDownload(); + boolean globusDownloadEnabled = settingsWrapper.isGlobusDownload(); for (FileMetadata fmd : this.selectedFiles){ boolean downloadable=this.fileDownloadHelper.canDownloadFile(fmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 8b7f732d03f..8ab1e87aef2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -6,6 +6,8 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; @@ -337,6 +339,26 @@ public boolean isGlobusEnabledStorageDriver(String driverId) { return (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId)); } + public boolean isDownloadable(FileMetadata fmd) { + boolean downloadable=true; + if(isGlobusFileDownload()) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + + downloadable = downloadable && !AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(driverId); + } + return downloadable; + } + + public boolean isGlobusTransferable(FileMetadata fmd) { + boolean globusTransferable=true; + if(isGlobusFileDownload()) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId); + } + return globusTransferable; + } + + public String getGlobusAppUrl() { if (globusAppUrl == null) { globusAppUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost"); diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index 318aab1454e..9c29fd777a1 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -60,7 +60,7 @@ -
  • +
  • gsio = new GlobusOverlayAccessIO(datafile, null, "globus"); + System.out.println("Size2 is " + gsio.retrieveSizeFromMedia()); + + System.out.println( + "NotValid: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//../of/the/hill")); + System.out.println( + "ValidRemote: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//of/the/hill")); + System.setProperty("dataverse.files.globus.managed", "true"); + datafile.setStorageIdentifier("globus://" + baseStoreId + "//" + logoPath); + System.out.println("ValidLocal: " + + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42")); + + // We can read the storageIdentifier and get the driver + assertTrue(datafile.getStorageIdentifier() + .startsWith(DataAccess.getStorageDriverFromIdentifier(datafile.getStorageIdentifier()))); + // We can get the driver type from it's ID + assertTrue(DataAccess.getDriverType("globus").equals(System.getProperty("dataverse.files.globus.type"))); + // When we get a StorageIO for the file, it is the right type + StorageIO storageIO = DataAccess.getStorageIO(localDatafile); + assertTrue(storageIO instanceof GlobusOverlayAccessIO); + // When we use it, we can get properties like the remote store name + GlobusOverlayAccessIO globusIO = (GlobusOverlayAccessIO) storageIO; + assertTrue( + globusIO.getRemoteStoreName().equals(System.getProperty("dataverse.files.globus.remote-store-name"))); + + String location = globusIO.getStorageLocation(); + assertEquals("globus:///" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + baseStoreId, location); +/* + // TBD: + // And can get a temporary download URL for the main file + String signedURL = globusIO.generateTemporaryDownloadUrl(null, null, null); + System.out.println(signedURL); + // And the URL starts with the right stuff + assertTrue(signedURL.startsWith(System.getProperty("dataverse.files.globus." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH) + "/" + logoPath)); + // And the signature is valid + // assertTrue( + // UrlSignerUtil.isValidUrl(signedURL, null, null, + // System.getProperty("dataverse.files.globus.secret-key"))); + // And we get an unsigned URL with the right stuff with no key + System.clearProperty("dataverse.files.globus.secret-key"); + String unsignedURL = globusIO.generateTemporaryDownloadUrl(null, null, null); + assertTrue(unsignedURL.equals(System.getProperty("dataverse.files.globus.base-url") + "/" + logoPath)); +*/ + // Once we've opened, we can get the file size (only works if the call to Globus + // works) + globusIO.open(DataAccessOption.READ_ACCESS); + assertTrue(globusIO.getSize() > 0); + // If we ask for the path for an aux file, it is correct + System.out.println(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, + identifier, baseStoreId + ".auxobject").toString()); + System.out.println(globusIO.getAuxObjectAsPath("auxobject").toString()); + assertTrue(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, identifier, + baseStoreId + ".auxobject").equals(globusIO.getAuxObjectAsPath("auxobject"))); + IOException thrown = assertThrows(IOException.class, () -> DataAccess.getStorageIO(localDatafile), + "Expected getStorageIO() to throw, but it didn't"); + // 'test' is the driverId in the IOException messages + assertTrue(thrown.getMessage().contains("globus")); + + } + + @Test + void testRemoteOverlayIdentifierFormats() throws IOException { + System.clearProperty("dataverse.files.globus.managed"); + datafile.setStorageIdentifier( + "globus://" + baseStoreId + "//d8c42580-6528-4605-9ad8-116a61982644/hdc1/" + logoPath); + assertTrue(DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier())); + assertFalse( + DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier().replace("globus", "bad"))); + assertFalse(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier())); + System.setProperty("dataverse.files.globus.managed", "true"); + assertTrue(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier())); + + } + +} From 93a586727a3c00069699eb47e5ca5ca3ebbf91cf Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 2 Dec 2023 17:58:45 -0500 Subject: [PATCH 277/414] remove old testing code --- .../dataaccess/GlobusOverlayAccessIO.java | 46 ----- .../dataaccess/GlobusOverlayAccessIOTest.java | 176 ------------------ 2 files changed, 222 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 3e72fa85d35..e825af8cf30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -408,52 +408,6 @@ private static String[] getAllowedEndpoints(String driverId) throws IOException } - public static void main(String[] args) { - System.out.println("Running the main method"); - if (args.length > 0) { - System.out.printf("List of arguments: {}", Arrays.toString(args)); - } - System.setProperty("dataverse.files.globus.base-url", "globus://d8c42580-6528-4605-9ad8-116a61982644"); - System.out.println("NotValid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); - System.out.println("ValidRemote: " + isValidIdentifier("globus", "globus://localid//of/the/hill")); - System.setProperty("dataverse.files.globus.managed", "true"); - - System.out.println("ValidLocal: " + isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42")); - System.setProperty("dataverse.files.globus.globus-token", - ""); - System.setProperty("dataverse.files.globus.base-store", "file"); - System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - System.setProperty("dataverse.files.file.directory", "/tmp/files"); - // logger.info(JvmSettings.BASE_URL.lookup("globus")); - // logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus")); - - try { - GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO( - "globus://1234///hdc1/image001.mrc", "globus"); - logger.info("Size is " + gsio.retrieveSizeFromMedia()); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - try { - DataFile df = new DataFile(); - Dataset ds = new Dataset(); - ds.setAuthority("10.5072"); - ds.setIdentifier("FK21234"); - df.setOwner(ds); - df.setStorageIdentifier("globus://1234///hdc1/image001.mrc"); - GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO(df, null, "globus"); - logger.info("Size2 is " + gsio.retrieveSizeFromMedia()); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - } - - @Override public void open(DataAccessOption... option) throws IOException { // TODO Auto-generated method stub diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java index bf3bcdbfe8e..e69de29bb2d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -1,176 +0,0 @@ -/* - * Copyright 2018 Forschungszentrum Jülich GmbH - * SPDX-License-Identifier: Apache 2.0 - */ -package edu.harvard.iq.dataverse.dataaccess; - -import edu.harvard.iq.dataverse.DOIServiceBean; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.mocks.MocksFactory; -import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.util.UrlSignerUtil; - -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import static org.junit.jupiter.api.Assertions.*; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.mockito.junit.jupiter.MockitoSettings; -import org.mockito.quality.Strictness; -import java.io.IOException; -import java.nio.file.Paths; - -@ExtendWith(MockitoExtension.class) -@MockitoSettings(strictness = Strictness.STRICT_STUBS) -public class GlobusOverlayAccessIOTest { - - @Mock - - private Dataset dataset; - private DataFile datafile; - private DataFile localDatafile; - private String baseStoreId = "182ad2bda2f-c3508e719076"; - private String logoPath = "image002.mrc"; - private String authority = "10.5072"; - private String identifier = "F2ABCDEF"; - - @BeforeEach - public void setUp() { - System.setProperty("dataverse.files.globus." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH, - "d8c42580-6528-4605-9ad8-116a61982644/hdc1"); - System.setProperty("dataverse.files.globus." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, - "d8c42580-6528-4605-9ad8-116a61982644/hdc1"); - - System.setProperty("dataverse.files.globus.globus-token", - "YTVlNzFjNzItYWVkYi00Mzg4LTkzNWQtY2NhM2IyODI2MzdmOnErQXRBeWNEMVM3amFWVnB0RlFnRk5zMTc3OFdDa3lGeVZPT3k0RDFpaXM9"); - System.setProperty("dataverse.files.globus.remote-store-name", "GlobusEndpoint1"); - System.setProperty("dataverse.files.globus.type", "globus"); - - System.setProperty("dataverse.files.globus.managed", "true"); - - System.setProperty("dataverse.files.globus.base-store", "file"); - System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - System.setProperty("dataverse.files.file.directory", "/tmp/files"); - - // System.setProperty("dataverse.files.test.type", "remote"); - System.setProperty("dataverse.files.globus.label", "globusTest"); - System.setProperty("dataverse.files.test.base-url", "https://demo.dataverse.org/resources"); - System.setProperty("dataverse.files.test.base-store", "file"); - System.setProperty("dataverse.files.test.download-redirect", "true"); - System.setProperty("dataverse.files.test.remote-store-name", "DemoDataCorp"); - System.setProperty("dataverse.files.globus.secret-key", "12345"); // Real keys should be much longer, more - // random - System.setProperty("dataverse.files.file.type", "file"); - System.setProperty("dataverse.files.file.label", "default"); - datafile = MocksFactory.makeDataFile(); - dataset = MocksFactory.makeDataset(); - dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", - DOIServiceBean.DOI_RESOLVER_URL, null)); - datafile.setOwner(dataset); - datafile.setStorageIdentifier("globus://" + baseStoreId + "//" + logoPath); - - localDatafile = MocksFactory.makeDataFile(); - localDatafile.setOwner(dataset); - localDatafile.setStorageIdentifier("globus://" + baseStoreId); - } - - @AfterEach - public void tearDown() { - System.clearProperty("dataverse.files.test.type"); - System.clearProperty("dataverse.files.test.label"); - System.clearProperty("dataverse.files.test.base-url"); - System.clearProperty("dataverse.files.test.base-store"); - System.clearProperty("dataverse.files.test.download-redirect"); - System.clearProperty("dataverse.files.test.label"); - System.clearProperty("dataverse.files.test.remote-store-name"); - System.clearProperty("dataverse.files.test.secret-key"); - System.clearProperty("dataverse.files.file.type"); - System.clearProperty("dataverse.files.file.label"); - } - - @Test - void testGlobusOverlayFiles() throws IOException { - System.clearProperty("dataverse.files.globus.managed"); - datafile.setStorageIdentifier( - "globus://" + baseStoreId + "//d8c42580-6528-4605-9ad8-116a61982644/hdc1/" + logoPath); - GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO(datafile, null, "globus"); - System.out.println("Size2 is " + gsio.retrieveSizeFromMedia()); - - System.out.println( - "NotValid: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//../of/the/hill")); - System.out.println( - "ValidRemote: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//of/the/hill")); - System.setProperty("dataverse.files.globus.managed", "true"); - datafile.setStorageIdentifier("globus://" + baseStoreId + "//" + logoPath); - System.out.println("ValidLocal: " - + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42")); - - // We can read the storageIdentifier and get the driver - assertTrue(datafile.getStorageIdentifier() - .startsWith(DataAccess.getStorageDriverFromIdentifier(datafile.getStorageIdentifier()))); - // We can get the driver type from it's ID - assertTrue(DataAccess.getDriverType("globus").equals(System.getProperty("dataverse.files.globus.type"))); - // When we get a StorageIO for the file, it is the right type - StorageIO storageIO = DataAccess.getStorageIO(localDatafile); - assertTrue(storageIO instanceof GlobusOverlayAccessIO); - // When we use it, we can get properties like the remote store name - GlobusOverlayAccessIO globusIO = (GlobusOverlayAccessIO) storageIO; - assertTrue( - globusIO.getRemoteStoreName().equals(System.getProperty("dataverse.files.globus.remote-store-name"))); - - String location = globusIO.getStorageLocation(); - assertEquals("globus:///" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + baseStoreId, location); -/* - // TBD: - // And can get a temporary download URL for the main file - String signedURL = globusIO.generateTemporaryDownloadUrl(null, null, null); - System.out.println(signedURL); - // And the URL starts with the right stuff - assertTrue(signedURL.startsWith(System.getProperty("dataverse.files.globus." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH) + "/" + logoPath)); - // And the signature is valid - // assertTrue( - // UrlSignerUtil.isValidUrl(signedURL, null, null, - // System.getProperty("dataverse.files.globus.secret-key"))); - // And we get an unsigned URL with the right stuff with no key - System.clearProperty("dataverse.files.globus.secret-key"); - String unsignedURL = globusIO.generateTemporaryDownloadUrl(null, null, null); - assertTrue(unsignedURL.equals(System.getProperty("dataverse.files.globus.base-url") + "/" + logoPath)); -*/ - // Once we've opened, we can get the file size (only works if the call to Globus - // works) - globusIO.open(DataAccessOption.READ_ACCESS); - assertTrue(globusIO.getSize() > 0); - // If we ask for the path for an aux file, it is correct - System.out.println(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, - identifier, baseStoreId + ".auxobject").toString()); - System.out.println(globusIO.getAuxObjectAsPath("auxobject").toString()); - assertTrue(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, identifier, - baseStoreId + ".auxobject").equals(globusIO.getAuxObjectAsPath("auxobject"))); - IOException thrown = assertThrows(IOException.class, () -> DataAccess.getStorageIO(localDatafile), - "Expected getStorageIO() to throw, but it didn't"); - // 'test' is the driverId in the IOException messages - assertTrue(thrown.getMessage().contains("globus")); - - } - - @Test - void testRemoteOverlayIdentifierFormats() throws IOException { - System.clearProperty("dataverse.files.globus.managed"); - datafile.setStorageIdentifier( - "globus://" + baseStoreId + "//d8c42580-6528-4605-9ad8-116a61982644/hdc1/" + logoPath); - assertTrue(DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier())); - assertFalse( - DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier().replace("globus", "bad"))); - assertFalse(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier())); - System.setProperty("dataverse.files.globus.managed", "true"); - assertTrue(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier())); - - } - -} From 1a96c566bccdf32aefeaca89898a3746b146fa08 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 3 Dec 2023 18:57:59 -0500 Subject: [PATCH 278/414] the kill switch for the real-time storageuse updates (just in case) and some related documentation (#8549) --- .../source/admin/collectionquotas.rst | 2 + .../source/installation/config.rst | 5 ++ .../dataverse/ingest/IngestServiceBean.java | 2 +- .../iq/dataverse/settings/JvmSettings.java | 3 + .../storageuse/StorageUseServiceBean.java | 58 ++++++------------- 5 files changed, 30 insertions(+), 40 deletions(-) diff --git a/doc/sphinx-guides/source/admin/collectionquotas.rst b/doc/sphinx-guides/source/admin/collectionquotas.rst index 883b6cf0c93..2ce3132e2ba 100644 --- a/doc/sphinx-guides/source/admin/collectionquotas.rst +++ b/doc/sphinx-guides/source/admin/collectionquotas.rst @@ -1,3 +1,4 @@ + Storage Quotas for Collections ============================== @@ -15,3 +16,4 @@ Please note that only the sizes of the main datafiles and the archival tab-delim When quotas are set and enforced, the users will be informed of the remaining storage allocation on the file upload page together with other upload and processing limits. +Part of the new and experimental nature of this feature is that we don't know for the fact yet how well it will function in real life on a very busy production system, despite our best efforts to test it prior to the release. One specific issue is having to update the recorded storage use for every parent collection of the given dataset whenever new files are added. This includes updating the combined size of the root, top collection - which will need to be updated after *every* file upload. In an unlikely case that this will start causing problems with race conditions and database update conflicts, it is possible to disable these updates (and thus disable the storage quotas feature), by setting the :ref:`dataverse.storageuse.disable-storageuse-increments` JVM setting to true. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 52ba35376ac..03eeff9dbb6 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2489,6 +2489,11 @@ This setting was added to keep S3 direct upload lightweight. When that feature i See also :ref:`s3-direct-upload-features-disabled`. +dataverse.storageuse.disable-storageuse-increments +++++++++++++++++++++++++++++++++++++++++++++++++++ + +This setting serves the role of an emergency "kill switch" that will disable maintaining the real time record of storage use for all the datasets and collections in the database. Because of the experimental nature of this feature (see :doc:`/admin/collectionquotas`) that hasn't been used in production setting as of this release, v6.1 this setting is provided in case these updates start causing database race conditions and conflicts on a busy server. + dataverse.auth.oidc.* +++++++++++++++++++++ diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 5efb4c06f48..233f746fb17 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -206,7 +206,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, boolean unattached = false; boolean savedSuccess = false; if (dataFile.getOwner() == null) { - // is it ever "unattached"? + // is it ever "attached"? // do we ever call this method with dataFile.getOwner() != null? // - we really shouldn't be, either. unattached = true; diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index cc3272413c7..7c65bba77d5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -150,6 +150,9 @@ public enum JvmSettings { SCOPE_NETCDF(PREFIX, "netcdf"), GEO_EXTRACT_S3_DIRECT_UPLOAD(SCOPE_NETCDF, "geo-extract-s3-direct-upload"), + // STORAGE USE SETTINGS + SCOPE_STORAGEUSE(PREFIX, "storageuse"), + STORAGEUSE_DISABLE_UPDATES(SCOPE_STORAGEUSE, "disable-storageuse-increments"), ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java index b542a7cd661..18e4ef49640 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -1,12 +1,14 @@ package edu.harvard.iq.dataverse.storageuse; import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.settings.JvmSettings; import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; import jakarta.ejb.TransactionAttributeType; import jakarta.inject.Named; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; +import java.util.Optional; import java.util.logging.Logger; /** @@ -37,31 +39,6 @@ public Long findStorageSizeByDvContainerId(Long dvObjectId) { return res == null ? 0L : res; } - public void incrementStorageSizeHierarchy(DvObjectContainer dvObject, Long filesize) { - incrementStorageSize(dvObject, filesize); - DvObjectContainer parent = dvObject.getOwner(); - while (parent != null) { - incrementStorageSize(parent, filesize); - parent = parent.getOwner(); - } - } - - /** - * @param dvObject - * @param filesize - */ - public void incrementStorageSize(DvObjectContainer dvObject, Long filesize) { - StorageUse dvContainerSU = findByDvContainerId(dvObject.getId()); - if (dvContainerSU != null) { - // @todo: named query - dvContainerSU.incrementSizeInBytes(filesize); - em.merge(dvContainerSU); - } else { - dvContainerSU = new StorageUse(dvObject, filesize); - em.persist(dvContainerSU); - } - } - /** * Increments the recorded storage size for all the dvobject parents of a * datafile, recursively. @@ -71,20 +48,23 @@ public void incrementStorageSize(DvObjectContainer dvObject, Long filesize) { @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void incrementStorageSizeRecursively(Long dvObjectContainerId, Long increment) { //@todo should throw exceptions if either parameter is null - String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n" - + "(" - + " SELECT id, owner_id\n" - + " FROM dvobject\n" - + " WHERE id=" + dvObjectContainerId + "\n" - + " UNION ALL\n" - + " SELECT dvobject.id, dvobject.owner_id\n" - + " FROM dvobject\n" - + " JOIN uptree ON dvobject.id = uptree.owner_id)\n" - + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n" - + "FROM uptree\n" - + "WHERE dvobjectcontainer_id = uptree.id;"; - - int parentsUpdated = em.createNativeQuery(queryString).executeUpdate(); + Optional allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class); + if (!(allow.isPresent() && allow.get())) { + String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n" + + "(" + + " SELECT id, owner_id\n" + + " FROM dvobject\n" + + " WHERE id=" + dvObjectContainerId + "\n" + + " UNION ALL\n" + + " SELECT dvobject.id, dvobject.owner_id\n" + + " FROM dvobject\n" + + " JOIN uptree ON dvobject.id = uptree.owner_id)\n" + + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n" + + "FROM uptree\n" + + "WHERE dvobjectcontainer_id = uptree.id;"; + + int parentsUpdated = em.createNativeQuery(queryString).executeUpdate(); + } // @todo throw an exception if the number of parent dvobjects updated by // the query is < 2 - ? } From 0a536da0c42ed9654641985f1fd8dc20b461c16c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 4 Dec 2023 09:46:59 -0500 Subject: [PATCH 279/414] a missing ref in the doc. #8549 --- doc/sphinx-guides/source/installation/config.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 03eeff9dbb6..7cb321708a7 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2489,6 +2489,8 @@ This setting was added to keep S3 direct upload lightweight. When that feature i See also :ref:`s3-direct-upload-features-disabled`. +.. _dataverse.storageuse.disable-storageuse-increments: + dataverse.storageuse.disable-storageuse-increments ++++++++++++++++++++++++++++++++++++++++++++++++++ From b20f198368615d7d8c4e798a25d6f68a6d0c4ed9 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 4 Dec 2023 11:27:27 -0500 Subject: [PATCH 280/414] Bump version to 6.1 --- doc/sphinx-guides/source/conf.py | 4 ++-- doc/sphinx-guides/source/versions.rst | 3 ++- modules/dataverse-parent/pom.xml | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index 0660ec3b071..64efc359e9a 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -66,9 +66,9 @@ # built documents. # # The short X.Y version. -version = '6.0' +version = '6.1' # The full version, including alpha/beta/rc tags. -release = '6.0' +release = '6.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index 2000a2097f0..2cf7f46dc5e 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -7,7 +7,8 @@ Dataverse Software Documentation Versions This list provides a way to refer to the documentation for previous and future versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. - pre-release `HTML (not final!) `__ and `PDF (experimental!) `__ built from the :doc:`develop ` branch :doc:`(how to contribute!) ` -- 6.0 +- 6.1 +- `6.0 `__ - `5.14 `__ - `5.13 `__ - `5.12.1 `__ diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index db0fa46a952..7b305cad581 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -131,7 +131,7 @@ - 6.0 + 6.1 17 UTF-8 From 5f29144762c166c7856958497e24f629d53c92a0 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 4 Dec 2023 12:58:01 -0500 Subject: [PATCH 281/414] adding 6.1 release notes and removing .md files --- ...001-datasets-files-api-user-permissions.md | 13 -- doc/release-notes/10060-api-changelog.md | 3 - .../10093-signedUrl_improvements.md | 5 - .../10104-dataset-citation-deaccessioned.md | 1 - doc/release-notes/6.1-release-notes.md | 195 ++++++++++++++++++ .../9268-8349-oidc-improvements.md | 43 ---- doc/release-notes/9412-markdown-previewer.md | 1 - doc/release-notes/9428-alternative-title.md | 9 - doc/release-notes/9589-ds-configure-tool.md | 1 - doc/release-notes/9590-intellij-redeploy.md | 3 - .../9599-guestbook-at-request.md | 2 - doc/release-notes/9635-solr-improvements.md | 4 - doc/release-notes/9692-files-api-extension.md | 7 - .../9714-files-api-extension-filters.md | 14 -- .../9763-versions-api-improvements.md | 8 - .../9785-files-api-extension-search-text.md | 3 - .../9834-files-api-extension-counts.md | 6 - ...oad-extension-new-file-access-endpoints.md | 14 -- .../9852-files-api-extension-deaccession.md | 12 -- .../9880-info-api-zip-limit-embargo.md | 5 - .../9907-files-api-counts-with-criteria.md | 11 - doc/release-notes/9955-Signposting-updates.md | 7 - ...et-api-downloadsize-ignore-tabular-size.md | 9 - .../9972-files-api-filter-by-tabular-tags.md | 3 - ...with-criteria-and-deaccessioned-support.md | 12 -- 25 files changed, 195 insertions(+), 196 deletions(-) delete mode 100644 doc/release-notes/10001-datasets-files-api-user-permissions.md delete mode 100644 doc/release-notes/10060-api-changelog.md delete mode 100644 doc/release-notes/10093-signedUrl_improvements.md delete mode 100644 doc/release-notes/10104-dataset-citation-deaccessioned.md create mode 100644 doc/release-notes/6.1-release-notes.md delete mode 100644 doc/release-notes/9268-8349-oidc-improvements.md delete mode 100644 doc/release-notes/9412-markdown-previewer.md delete mode 100644 doc/release-notes/9428-alternative-title.md delete mode 100644 doc/release-notes/9589-ds-configure-tool.md delete mode 100644 doc/release-notes/9590-intellij-redeploy.md delete mode 100644 doc/release-notes/9599-guestbook-at-request.md delete mode 100644 doc/release-notes/9635-solr-improvements.md delete mode 100644 doc/release-notes/9692-files-api-extension.md delete mode 100644 doc/release-notes/9714-files-api-extension-filters.md delete mode 100644 doc/release-notes/9763-versions-api-improvements.md delete mode 100644 doc/release-notes/9785-files-api-extension-search-text.md delete mode 100644 doc/release-notes/9834-files-api-extension-counts.md delete mode 100644 doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md delete mode 100644 doc/release-notes/9852-files-api-extension-deaccession.md delete mode 100644 doc/release-notes/9880-info-api-zip-limit-embargo.md delete mode 100644 doc/release-notes/9907-files-api-counts-with-criteria.md delete mode 100644 doc/release-notes/9955-Signposting-updates.md delete mode 100644 doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md delete mode 100644 doc/release-notes/9972-files-api-filter-by-tabular-tags.md delete mode 100644 doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md diff --git a/doc/release-notes/10001-datasets-files-api-user-permissions.md b/doc/release-notes/10001-datasets-files-api-user-permissions.md deleted file mode 100644 index 0aa75f9218a..00000000000 --- a/doc/release-notes/10001-datasets-files-api-user-permissions.md +++ /dev/null @@ -1,13 +0,0 @@ -- New query parameter `includeDeaccessioned` added to the getVersion endpoint (/api/datasets/{id}/versions/{versionId}) to consider deaccessioned versions when searching for versions. - - -- New endpoint to get user permissions on a dataset (/api/datasets/{id}/userPermissions). In particular, the user permissions that this API call checks, returned as booleans, are the following: - - - Can view the unpublished dataset - - Can edit the dataset - - Can publish the dataset - - Can manage the dataset permissions - - Can delete the dataset draft - - -- New permission check "canManageFilePermissions" added to the existing endpoint for getting user permissions on a file (/api/access/datafile/{id}/userPermissions). \ No newline at end of file diff --git a/doc/release-notes/10060-api-changelog.md b/doc/release-notes/10060-api-changelog.md deleted file mode 100644 index 56ac96e3564..00000000000 --- a/doc/release-notes/10060-api-changelog.md +++ /dev/null @@ -1,3 +0,0 @@ -We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html - -See also #10060. diff --git a/doc/release-notes/10093-signedUrl_improvements.md b/doc/release-notes/10093-signedUrl_improvements.md deleted file mode 100644 index 26a17c65e3f..00000000000 --- a/doc/release-notes/10093-signedUrl_improvements.md +++ /dev/null @@ -1,5 +0,0 @@ -A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - -SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - -Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. diff --git a/doc/release-notes/10104-dataset-citation-deaccessioned.md b/doc/release-notes/10104-dataset-citation-deaccessioned.md deleted file mode 100644 index 0ba06d729c4..00000000000 --- a/doc/release-notes/10104-dataset-citation-deaccessioned.md +++ /dev/null @@ -1 +0,0 @@ -The getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md new file mode 100644 index 00000000000..c2b52ab34b8 --- /dev/null +++ b/doc/release-notes/6.1-release-notes.md @@ -0,0 +1,195 @@ +# Dataverse 6.1 + +(If this note appears truncated on the GitHub Releases page, you can view it in full in the source tree: https://github.com/IQSS/dataverse/blob/master/doc/release-notes/6.1-release-notes.md) + +This release brings new features, enhancements, and bug fixes to the Dataverse software. +Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. + +## Release Highlights (Major Upgrades, Breaking Changes) + +This release contains major upgrades to core components. Detailed upgrade instructions can be found below. + +## Detailed Release Highlights, New Features and Use Case Scenarios + +### Dataverse installation can be now be configured to allow out-of-band upload +- Installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. +By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). +With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. + +### Alternative Title is made repeatable. +- One will need to update database with updated citation block. + `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` +- One will also need to update solr schema: + Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` + Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` + +Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. +For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"] + +### Improvements in the /versions API +- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions +- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output +- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. + +This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. +### The following API endpoints have been added: + +- /api/files/{id}/downloadCount +- /api/files/{id}/dataTables +- /api/files/{id}/metadata/tabularTags New endpoint to set tabular file tags. +- canManageFilePermissions (/access/datafile/{id}/userPermissions) Added for getting user permissions on a file. +- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Given a dataset and its version, retrieves file counts based on different criteria (Total count, per content type, per access status and per category name). +- setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created. +- userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file. +- hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version. +- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). +- getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. +- getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. + +### Extended the existing endpoints: +- getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. +- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain file counts. Added support for filtering by optional criteria query parameter: + - contentType + - accessStatus + - categoryName + - tabularTagName + - searchText +- getDownloadSize ("api/datasets/{identifier}/versions/{versionId}/downloadsize"): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. Added a new optional query parameter "mode" +This parameter applies a filter criteria to the operation and supports the following values: + - All (Default): Includes both archival and original sizes for tabular files + - Archival: Includes only the archival size for tabular files + - Original: Includes only the original size for tabular files. +- /api/datasets/{id}/versions/{versionId} New query parameter `includeDeaccessioned` added to consider deaccessioned versions when searching for versions. +- /api/datasets/{id}/userPermissions Get user permissions on a dataset, in particular, the user permissions that this API call checks, returned as booleans, are the following: + - Can view the unpublished dataset + - Can edit the dataset + - Can publish the dataset + - Can manage the dataset permissions + - Can delete the dataset draft +- getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. + + +### DataFile API payload has been extended to include the following fields: +- tabularData: Boolean field to know if the DataFile is of tabular type +- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) +- friendlyType: String + +### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering +- Access status: through the `accessStatus` query parameter, which supports the following values: + - Public + - Restricted + - EmbargoedThenRestricted + - EmbargoedThenPublic +- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. +- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". + + +### Misc +- Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. + +- Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). +The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. + +- Dataverse's OAI_ORE Metadata Export format and archival BagIT exports +(which include the OAI-ORE metadata export file) have been updated to include +information about the dataset version state, e.g. RELEASED or DEACCESSIONED +and to indicate which version of Dataverse was used to create the archival Bag. +As part of the latter, the current OAI_ORE Metadata format has been given a 1.0.0 +version designation and it is expected that any future changes to the OAI_ORE export +format will result in a version change and that tools such as DVUploader that can +recreate datasets from archival Bags will start indicating which version(s) of the +OAI_ORE format they can read. +Dataverse installations that have been using archival Bags may wish to update any +existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse +[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) +to generate updated versions. + +- This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. + - To fix #9952, we surround the license info with `<` and `>`. + - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information + - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. + +- We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html +See also #10060. + +### Solr Improvements +- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. + +Please see the "Installing Solr" section of the Installation Prerequisites guide. + + +### Development +- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. +For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools + +- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews + +- A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. + - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. + - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. + +## OpenID Connect Authentication Provider Improvements + +### Using MicroProfile Config For Provisioning + +With this release it is possible to provision a single OIDC-based authentication provider +by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. + +If you are using an external OIDC provider component as an identity management system and/or broker +to other authentication providers such as Google, eduGain SAML and so on, this might make your +life easier during instance setups and reconfiguration. You no longer need to generate the +necessary JSON file. + +### Adding PKCE Support + +Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable +support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) + +## Improved Testing + +With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests +like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition +of both OIDC login options in UI and API. + +The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. + +The support for setting JVM options during testing has been improved for developers. You now may add the +`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is +also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. + +As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. + +## New Configuration Options + +- dataverse.auth.oidc.enabled +- dataverse.auth.oidc.client-id +- dataverse.auth.oidc.client-secret +- dataverse.auth.oidc.auth-server-url +- dataverse.auth.oidc.pkce.enabled +- dataverse.auth.oidc.pkce.method +- dataverse.auth.oidc.title +- dataverse.auth.oidc.subtitle +- dataverse.auth.oidc.pkce.max-cache-size +- dataverse.auth.oidc.pkce.max-cache-age + +## Installation + +If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! + +Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! + +You are also very welcome to join the [Global Dataverse Community Consortium](https://dataversecommunity.global) (GDCC). + +## Upgrade Instructions + +Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. + +These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. + +## Complete List of Changes + +For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. + +## Getting Help + +For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. diff --git a/doc/release-notes/9268-8349-oidc-improvements.md b/doc/release-notes/9268-8349-oidc-improvements.md deleted file mode 100644 index ddfc13e603c..00000000000 --- a/doc/release-notes/9268-8349-oidc-improvements.md +++ /dev/null @@ -1,43 +0,0 @@ -## OpenID Connect Authentication Provider Improvements - -### Using MicroProfile Config For Provisioning - -With this release it is possible to provision a single OIDC-based authentication provider -by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. - -If you are using an external OIDC provider component as an identity management system and/or broker -to other authentication providers such as Google, eduGain SAML and so on, this might make your -life easier during instance setups and reconfiguration. You no longer need to generate the -necessary JSON file. - -### Adding PKCE Support - -Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable -support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) - -## Improved Testing - -With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests -like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition -of both OIDC login options in UI and API. - -The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. - -The support for setting JVM options during testing has been improved for developers. You now may add the -`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is -also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. - -As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. - -## New Configuration Options - -- dataverse.auth.oidc.enabled -- dataverse.auth.oidc.client-id -- dataverse.auth.oidc.client-secret -- dataverse.auth.oidc.auth-server-url -- dataverse.auth.oidc.pkce.enabled -- dataverse.auth.oidc.pkce.method -- dataverse.auth.oidc.title -- dataverse.auth.oidc.subtitle -- dataverse.auth.oidc.pkce.max-cache-size -- dataverse.auth.oidc.pkce.max-cache-age diff --git a/doc/release-notes/9412-markdown-previewer.md b/doc/release-notes/9412-markdown-previewer.md deleted file mode 100644 index 8faa2679fb0..00000000000 --- a/doc/release-notes/9412-markdown-previewer.md +++ /dev/null @@ -1 +0,0 @@ -There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews diff --git a/doc/release-notes/9428-alternative-title.md b/doc/release-notes/9428-alternative-title.md deleted file mode 100644 index 3bc74f218b5..00000000000 --- a/doc/release-notes/9428-alternative-title.md +++ /dev/null @@ -1,9 +0,0 @@ -Alternative Title is made repeatable. -- One will need to update database with updated citation block. -`curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -- One will also need to update solr schema: -Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` -Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` - -Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. -For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"] diff --git a/doc/release-notes/9589-ds-configure-tool.md b/doc/release-notes/9589-ds-configure-tool.md deleted file mode 100644 index 70ac5fcaa6a..00000000000 --- a/doc/release-notes/9589-ds-configure-tool.md +++ /dev/null @@ -1 +0,0 @@ -Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. diff --git a/doc/release-notes/9590-intellij-redeploy.md b/doc/release-notes/9590-intellij-redeploy.md deleted file mode 100644 index 07af352ece4..00000000000 --- a/doc/release-notes/9590-intellij-redeploy.md +++ /dev/null @@ -1,3 +0,0 @@ -Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. - -For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools diff --git a/doc/release-notes/9599-guestbook-at-request.md b/doc/release-notes/9599-guestbook-at-request.md deleted file mode 100644 index e9554b71fb4..00000000000 --- a/doc/release-notes/9599-guestbook-at-request.md +++ /dev/null @@ -1,2 +0,0 @@ -Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). -The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. diff --git a/doc/release-notes/9635-solr-improvements.md b/doc/release-notes/9635-solr-improvements.md deleted file mode 100644 index ad55ee3afe6..00000000000 --- a/doc/release-notes/9635-solr-improvements.md +++ /dev/null @@ -1,4 +0,0 @@ -- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. - -Please see the "Installing Solr" section of the Installation Prerequisites guide. - diff --git a/doc/release-notes/9692-files-api-extension.md b/doc/release-notes/9692-files-api-extension.md deleted file mode 100644 index baa8e2f87cd..00000000000 --- a/doc/release-notes/9692-files-api-extension.md +++ /dev/null @@ -1,7 +0,0 @@ -The following API endpoints have been added: - -- /api/files/{id}/downloadCount -- /api/files/{id}/dataTables -- /access/datafile/{id}/userPermissions - -The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination and ordering diff --git a/doc/release-notes/9714-files-api-extension-filters.md b/doc/release-notes/9714-files-api-extension-filters.md deleted file mode 100644 index 034230efe61..00000000000 --- a/doc/release-notes/9714-files-api-extension-filters.md +++ /dev/null @@ -1,14 +0,0 @@ -The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support optional filtering by: - -- Access status: through the `accessStatus` query parameter, which supports the following values: - - - Public - - Restricted - - EmbargoedThenRestricted - - EmbargoedThenPublic - - -- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. - - -- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". diff --git a/doc/release-notes/9763-versions-api-improvements.md b/doc/release-notes/9763-versions-api-improvements.md deleted file mode 100644 index 8d7f6c7a20a..00000000000 --- a/doc/release-notes/9763-versions-api-improvements.md +++ /dev/null @@ -1,8 +0,0 @@ -# Improvements in the /versions API - -- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions; -- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output; -- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. - -This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. - diff --git a/doc/release-notes/9785-files-api-extension-search-text.md b/doc/release-notes/9785-files-api-extension-search-text.md deleted file mode 100644 index fb185e1c7af..00000000000 --- a/doc/release-notes/9785-files-api-extension-search-text.md +++ /dev/null @@ -1,3 +0,0 @@ -The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support optional filtering by search text through the `searchText` query parameter. - -The search will be applied to the labels and descriptions of the dataset files. diff --git a/doc/release-notes/9834-files-api-extension-counts.md b/doc/release-notes/9834-files-api-extension-counts.md deleted file mode 100644 index 3ec15d8bd36..00000000000 --- a/doc/release-notes/9834-files-api-extension-counts.md +++ /dev/null @@ -1,6 +0,0 @@ -Implemented the following new endpoints: - -- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Given a dataset and its version, retrieves file counts based on different criteria (Total count, per content type, per access status and per category name). - - -- setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created. diff --git a/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md b/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md deleted file mode 100644 index f306ae2ab80..00000000000 --- a/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md +++ /dev/null @@ -1,14 +0,0 @@ -Implemented the following new endpoints: - -- userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file. - - -- hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version. - - -In addition, the DataFile API payload has been extended to include the following fields: - -- tabularData: Boolean field to know if the DataFile is of tabular type - - -- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) diff --git a/doc/release-notes/9852-files-api-extension-deaccession.md b/doc/release-notes/9852-files-api-extension-deaccession.md deleted file mode 100644 index 55698580e3c..00000000000 --- a/doc/release-notes/9852-files-api-extension-deaccession.md +++ /dev/null @@ -1,12 +0,0 @@ -Extended the existing endpoints: - -- getVersionFiles (/api/datasets/{id}/versions/{versionId}/files) -- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts) - -The above endpoints now accept a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files or file counts. - -Additionally, a new endpoint has been developed to support version deaccessioning through API (Given a dataset and a version). - -- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession) - -Finally, the DataFile API payload has been extended to add the field "friendlyType" diff --git a/doc/release-notes/9880-info-api-zip-limit-embargo.md b/doc/release-notes/9880-info-api-zip-limit-embargo.md deleted file mode 100644 index d2afb139e72..00000000000 --- a/doc/release-notes/9880-info-api-zip-limit-embargo.md +++ /dev/null @@ -1,5 +0,0 @@ -Implemented the following new endpoints: - -- getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. - -- getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. diff --git a/doc/release-notes/9907-files-api-counts-with-criteria.md b/doc/release-notes/9907-files-api-counts-with-criteria.md deleted file mode 100644 index 07cd23daad0..00000000000 --- a/doc/release-notes/9907-files-api-counts-with-criteria.md +++ /dev/null @@ -1,11 +0,0 @@ -Extended the getVersionFileCounts endpoint (/api/datasets/{id}/versions/{versionId}/files/counts) to support filtering by criteria. - -In particular, the endpoint now accepts the following optional criteria query parameters: - -- contentType -- accessStatus -- categoryName -- tabularTagName -- searchText - -This filtering criteria is the same as the one for the getVersionFiles endpoint. diff --git a/doc/release-notes/9955-Signposting-updates.md b/doc/release-notes/9955-Signposting-updates.md deleted file mode 100644 index db0e27e51c5..00000000000 --- a/doc/release-notes/9955-Signposting-updates.md +++ /dev/null @@ -1,7 +0,0 @@ -This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - -To fix #9952, we surround the license info with `<` and `>`. - -To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - -To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. diff --git a/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md b/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md deleted file mode 100644 index 2ede679b361..00000000000 --- a/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md +++ /dev/null @@ -1,9 +0,0 @@ -Added a new optional query parameter "mode" to the "getDownloadSize" API endpoint ("api/datasets/{identifier}/versions/{versionId}/downloadsize"). - -This parameter applies a filter criteria to the operation and supports the following values: - -- All (Default): Includes both archival and original sizes for tabular files - -- Archival: Includes only the archival size for tabular files - -- Original: Includes only the original size for tabular files diff --git a/doc/release-notes/9972-files-api-filter-by-tabular-tags.md b/doc/release-notes/9972-files-api-filter-by-tabular-tags.md deleted file mode 100644 index 9c3fced1741..00000000000 --- a/doc/release-notes/9972-files-api-filter-by-tabular-tags.md +++ /dev/null @@ -1,3 +0,0 @@ -- New query parameter `tabularTagName` added to the getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) to return files to which the particular tabular tag has been added. - -- New endpoint to set tabular file tags via API: /api/files/{id}/metadata/tabularTags. diff --git a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md b/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md deleted file mode 100644 index 020224b2094..00000000000 --- a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md +++ /dev/null @@ -1,12 +0,0 @@ -Extended the getDownloadSize endpoint (/api/datasets/{id}/versions/{versionId}/downloadsize), including the following new features: - -- The endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned dataset versions when searching for versions to obtain the file total download size. - - -- The endpoint now supports filtering by criteria. In particular, it accepts the following optional criteria query parameters: - - - contentType - - accessStatus - - categoryName - - tabularTagName - - searchText From b077d98a11e6957085757c54c48030ef33b50c30 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Dec 2023 13:30:03 -0500 Subject: [PATCH 282/414] doc update, release note --- doc/release-notes/10162-globus-support.md | 14 ++++++++++++++ .../source/developers/big-data-support.rst | 7 +++++-- 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 doc/release-notes/10162-globus-support.md diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md new file mode 100644 index 00000000000..d64e72b70a1 --- /dev/null +++ b/doc/release-notes/10162-globus-support.md @@ -0,0 +1,14 @@ +Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) +- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incomatibilities. +- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model +- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus) +- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs) + +Backward Incompatibilities: +- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism +- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. + +New JVM Options: +- A new 'globus' store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). + +Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index d38f7f27a68..fe49f9f6150 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -152,8 +152,6 @@ Note: Globus file transfer is still experimental but feedback is welcome! See :r Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) and a community-developed `dataverse-globus `_ app has been properly installed and configured. -Due to differences in the access control models of a Dataverse installation and Globus, enabling the Globus capability on a store will disable the ability to restrict and embargo files in that store. - Globus endpoints can be in a variety of places, from data centers to personal computers. This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa). @@ -162,11 +160,16 @@ Globus transfer uses an efficient transfer mechanism and has additional features * robust file transfer capable of restarting after network or endpoint failures * third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation +Note: Due to differences in the access control models of a Dataverse installation and Globus and the current Globus store model, Dataverse cannot enforce per-file-access restrictions. +It is therefore recommended that a store be configured as public, which disables the ability to restrict and embargo files in that store, when Globus access is allowed. + Dataverse supports three options for using Globus, two involving transfer to Dataverse-managed endpoints and one allowing Dataverse to reference files on remote endpoints. Dataverse-managed endpoints must be Globus 'guest collections' hosted on either a file-system-based endpoint or an S3-based endpoint (the latter requires use of the Globus S3 connector which requires a paid Globus subscription at the host institution). In either case, Dataverse is configured with the Globus credentials of a user account that can manage the endpoint. Users will need a Globus account, which can be obtained via their institution or directly from Globus (at no cost). +With the file-system endpoint, Dataverse does not currently have access to the file contents. Thus, functionlity related to ingest, previews, fixity hash validation, etc. are not available. (Using the S3-based endpoint, Dataverse has access via S3 and all functionlity normally associated with direct uploads to S3 is available.) + For the reference use case, Dataverse must be configured with a list of allowed endpoint/base paths from which files may be referenced. In this case, since Dataverse is not accessing the remote endpoint itself, it does not need Globus credentials. Users will need a Globus account in this case, and the remote endpoint must be configured to allow them access (i.e. be publicly readable, or potentially involving some out-of-band mechanism to request access (that could be described in the dataset's Terms of Use and Access). From 547d71c342e08ebdf674d8754dc072465ad20651 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 4 Dec 2023 14:31:07 -0500 Subject: [PATCH 283/414] #9464 add more detail to validation error message --- .../edu/harvard/iq/dataverse/DataverseServiceBean.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index ed46caf65a1..027e58d9263 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -1072,7 +1072,12 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid } catch (ValidationException vx) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); - return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); + String accumulatedexceptions = ""; + for (ValidationException va : vx.getCausingExceptions()){ + accumulatedexceptions = accumulatedexceptions + va; + accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " "); + } + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; } catch (Exception ex) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); From fc3ae08ec9335ac857af4d9c112e892255ef1c7a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Dec 2023 14:44:00 -0500 Subject: [PATCH 284/414] adding documentation --- .../edu/harvard/iq/dataverse/DatasetPage.java | 21 ++ .../harvard/iq/dataverse/api/Datasets.java | 238 +++++++++++------- 2 files changed, 163 insertions(+), 96 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 704c1d42228..f871d2e5198 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6346,6 +6346,27 @@ public boolean isGlobusTransferRequested() { return globusTransferRequested; } + /** + * Analagous with the startDownload method, this method is called when the user + * tries to start a Globus transfer out (~download). The + * validateFilesForDownload call checks to see if there are some files that can + * be Globus transfered and, if so and there are no files that can't be + * transferre, this method will launch the globus transfer app. If there is a + * mix of files or if the guestbook popup is required, the method passes back to + * the UI so those popup(s) can be shown. Once they are, this method is called + * with the popupShown param true and the app will be shown. + * + * @param transferAll - when called from the dataset Access menu, this should be + * true so that all files are included in the processing. + * When it is called from the file table, the current + * selection is used and the param should be false. + * @param popupShown - This method is called twice if the the mixed files or + * guestbook popups are needed. On the first call, popupShown + * is false so that the transfer is not started and those + * popups can be shown. On the second call, popupShown is + * true and processing will occur as long as there are some + * valid files to transfer. + */ public void startGlobusTransfer(boolean transferAll, boolean popupShown) { if (transferAll) { this.setSelectedFiles(workingVersion.getFileMetadatas()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 5961b428bcb..ae576134be3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3444,90 +3444,34 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam(" } - @POST - @AuthRequired - @Path("{id}/addGlobusFiles") - @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, - @PathParam("id") String datasetId, - @FormDataParam("jsonData") String jsonData, - @Context UriInfo uriInfo - ) throws IOException, ExecutionException, InterruptedException { - - logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); - - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); - } - - // ------------------------------------- - // (1) Get the user from the API key - // ------------------------------------- - AuthenticatedUser authUser; - try { - authUser = getRequestAuthenticatedUserOrDie(crc); - } catch (WrappedResponse ex) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); - } - - // ------------------------------------- - // (2) Get the Dataset Id - // ------------------------------------- - Dataset dataset; - - try { - dataset = findDatasetOrDie(datasetId); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } - - JsonObject jsonObject = null; - try { - jsonObject = JsonUtil.getJsonObject(jsonData); - } catch (Exception ex) { - logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage()); - return badRequest("Error parsing json body"); - - } - - //------------------------------------ - // (2b) Make sure dataset does not have package file - // -------------------------------------- - - for (DatasetVersion dv : dataset.getVersions()) { - if (dv.isHasPackageFile()) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") - ); - } - } - - - String lockInfoMessage = "Globus Upload API started "; - DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, - (authUser).getId(), lockInfoMessage); - if (lock != null) { - dataset.addLock(lock); - } else { - logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); - } - - - ApiToken token = authSvc.findApiTokenByUser(authUser); - - if(uriInfo != null) { - logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); - } - - - String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); - - // Async Call - globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); - - return ok("Async call to Globus Upload started "); - - } +/**************************** + * Globus Support Section: + * + * Globus transfer in (upload) and out (download) involve three basic steps: The + * app is launched and makes a callback to the + * globusUploadParameters/globusDownloadParameters method to get all of the info + * needed to set up it's display. + * + * At some point after that, the user will make a selection as to which files to + * transfer and the app will call requestGlobusUploadPaths/requestGlobusDownload + * to indicate a transfer is about to start. In addition to providing the + * details of where to transfer the files to/from, Dataverse also grants the + * Globus principal involved the relevant rw or r permission for the dataset. + * + * Once the transfer is started, the app records the task id and sends it to + * Dataverse in the addGlobusFiles/monitorGlobusDownload call. Dataverse then + * monitors the transfer task and when it ultimately succeeds for fails it + * revokes the principal's permission and, for the transfer in case, adds the + * files to the dataset. (The dataset is locked until the transfer completes.) + * + * (If no transfer is started within a specified timeout, permissions will + * automatically be revoked - see the GlobusServiceBean for details.) + * + * The option to reference a file at a remote endpoint (rather than transfer it) + * follows the first two steps of the process above but completes with a call to + * the normal /addFiles endpoint (as there is no transfer to monitor and the + * files can be added to the dataset immediately.) + */ /** * Retrieve the parameters and signed URLs required to perform a globus @@ -3630,11 +3574,11 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat } /** - * Requests permissions for a given globus user to upload to the dataset + * Provides specific storageIdentifiers to use for each file amd requests permissions for a given globus user to upload to the dataset * * @param crc * @param datasetId - * @param jsonData + * @param jsonData - an object that must include the id of the globus "principal" involved and the "numberOfFiles" that will be transferred. * @return * @throws IOException * @throws ExecutionException @@ -3721,15 +3665,114 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP } - /** - * Retrieve the parameters and signed URLs required to perform a globus - * transfer/download. This api endpoint is expected to be called as a signed - * callback after the globus-dataverse app/other app is launched, but it will - * accept other forms of authentication. + /** A method analogous to /addFiles that must also include the taskIdentifier of the transfer-in-progress to monitor * * @param crc * @param datasetId + * @param jsonData - see /addFiles documentation, aditional "taskIdentifier" key in the main object is required. + * @param uriInfo + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException */ + @POST + @AuthRequired + @Path("{id}/addGlobusFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @FormDataParam("jsonData") String jsonData, + @Context UriInfo uriInfo + ) throws IOException, ExecutionException, InterruptedException { + + logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + JsonObject jsonObject = null; + try { + jsonObject = JsonUtil.getJsonObject(jsonData); + } catch (Exception ex) { + logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage()); + return badRequest("Error parsing json body"); + + } + + //------------------------------------ + // (2b) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + + String lockInfoMessage = "Globus Upload API started "; + DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, + (authUser).getId(), lockInfoMessage); + if (lock != null) { + dataset.addLock(lock); + } else { + logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } + + + ApiToken token = authSvc.findApiTokenByUser(authUser); + + if(uriInfo != null) { + logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); + } + + + String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); + + // Async Call + globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + + return ok("Async call to Globus Upload started "); + + } + +/** + * Retrieve the parameters and signed URLs required to perform a globus + * transfer/download. This api endpoint is expected to be called as a signed + * callback after the globus-dataverse app/other app is launched, but it will + * accept other forms of authentication. + * + * @param crc + * @param datasetId + * @param locale + * @param downloadId - an id to a cached object listing the files involved. This is generated via Dataverse and provided to the dataverse-globus app in a signedURL. + * @return - JSON containing the parameters and URLs needed by the dataverse-globus app. The format is analogous to that for external tools. + */ @GET @AuthRequired @Path("{id}/globusDownloadParameters") @@ -3815,12 +3858,14 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P /** * Requests permissions for a given globus user to download the specified files - * the dataset + * the dataset and returns information about the paths to transfer from. + * + * When called directly rather than in response to being given a downloadId, the jsonData can include a "fileIds" key with an array of file ids to transfer. * * @param crc * @param datasetId - * @param jsonData - * @return + * @param jsonData - a JSON object that must include the id of the Globus "principal" that will be transferring the files in the case where Dataverse manages the Globus endpoint. For remote endpoints, the principal is not required. + * @return - a JSON object containing a map of file ids to Globus endpoint/path * @throws IOException * @throws ExecutionException * @throws InterruptedException @@ -3957,11 +4002,12 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat /** * Monitors a globus download and removes permissions on the dir/dataset when - * done + * the specified transfer task is completed. * * @param crc * @param datasetId - * @param jsonData + * @param jsonData - a JSON Object containing the key "taskIdentifier" with the + * Globus task to monitor. * @return * @throws IOException * @throws ExecutionException From 7697157ac98049dea45a2bd98193aad75e6037e1 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 4 Dec 2023 15:27:21 -0500 Subject: [PATCH 285/414] #9464 handle single errors --- .../edu/harvard/iq/dataverse/DataverseServiceBean.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 027e58d9263..07e7fe615e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -1077,7 +1077,12 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { accumulatedexceptions = accumulatedexceptions + va; accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " "); } - return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; + if (!accumulatedexceptions.isEmpty()){ + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; + } else { + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); + } + } catch (Exception ex) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); From 8ec61d084a81c7d5786bd583177b80255aa7e883 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Dec 2023 15:58:21 -0500 Subject: [PATCH 286/414] cleanup, add method stubs, open for basestore, info->fine --- .../AbstractRemoteOverlayAccessIO.java | 12 +- .../dataaccess/GlobusAccessibleStore.java | 6 + .../dataaccess/GlobusOverlayAccessIO.java | 142 ++++++++++++------ 3 files changed, 112 insertions(+), 48 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java index 16defc26a4f..8d058b7c9e3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -15,11 +15,8 @@ import javax.net.ssl.SSLContext; -import org.apache.http.Header; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpHead; import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.config.Registry; import org.apache.http.config.RegistryBuilder; @@ -30,15 +27,18 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; -import org.apache.http.protocol.HTTP; import org.apache.http.ssl.SSLContextBuilder; -import org.apache.http.util.EntityUtils; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; + +/** + * A base class for StorageIO implementations supporting remote access. At present, that includes the RemoteOverlayAccessIO store and the newer GlobusOverlayAccessIO store. It primarily includes + * common methods for handling auxiliary files in the configured base store. + * @param + */ public abstract class AbstractRemoteOverlayAccessIO extends StorageIO { protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java index d827e40e807..e4d062f0619 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -6,7 +6,13 @@ public interface GlobusAccessibleStore { + //Whether Dataverse manages access controls for the Globus endpoint or not. static final String MANAGED = "managed"; + /* + * transfer and reference endpoint formats: + * + * REFERENCE_ENDPOINTS_WITH_BASEPATHS - reference endpoints separated by a comma + */ static final String TRANSFER_ENDPOINT_WITH_BASEPATH = "transfer-endpoint-with-basepath"; static final String GLOBUS_TOKEN = "globus-token"; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index e825af8cf30..7a6809cb2ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -2,12 +2,15 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.URI; @@ -16,6 +19,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; +import java.util.List; import java.util.logging.Logger; import org.apache.http.client.ClientProtocolException; @@ -32,9 +36,18 @@ /** * @author qqmyers - */ + * + * This class implements three related use cases, all of which leverage the underlying idea of using a base store (as with the Https RemoteOverlay store): + * Managed - where Dataverse has control of the specified Globus endpoint and can set/remove permissions as needed to allow file transfers in/out: + * File/generic endpoint - assumes Dataverse does not have access to the datafile contents + * S3-Connector endpoint - assumes the datafiles are accessible via Globus and via S3 such that Dataverse can access to the datafile contents when needed. + * Remote - where Dataverse references files that remain at remote Globus endpoints (as with the Https RemoteOverlay store) and cannot access to the datafile contents. + * + * Note that Globus endpoints can provide Http URLs to get file contents, so a future enhancement could potentially support datafile contents access in the Managed/File and Remote cases. + * + * */ /* - * Globus Overlay Driver + * Globus Overlay Driver storageIdentifer format: * * Remote: StorageIdentifier format: * ://// @@ -47,11 +60,6 @@ * * Storage location: * /// - * - * transfer and reference endpoint formats: - * - * - * reference endpoints separated by a comma * */ public class GlobusOverlayAccessIO extends AbstractRemoteOverlayAccessIO implements GlobusAccessibleStore { @@ -115,7 +123,6 @@ private String retrieveGlobusAccessToken() { return accessToken.getOtherTokens().get(0).getAccessToken(); } - private void parsePath() { int filenameStart = path.lastIndexOf("/") + 1; String endpointWithBasePath = null; @@ -126,9 +133,9 @@ private void parsePath() { } //String endpointWithBasePath = baseEndpointPath.substring(baseEndpointPath.lastIndexOf(DataAccess.SEPARATOR) + 3); int pathStart = endpointWithBasePath.indexOf("/"); - logger.info("endpointWithBasePath: " + endpointWithBasePath); + logger.fine("endpointWithBasePath: " + endpointWithBasePath); endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); - logger.info("endpointPath: " + endpointPath); + logger.fine("endpointPath: " + endpointPath); if (isManaged() && (dvObject!=null)) { @@ -146,7 +153,7 @@ private void parsePath() { if (filenameStart > 0) { relativeDirectoryPath = relativeDirectoryPath + path.substring(0, filenameStart); } - logger.info("relativeDirectoryPath finally: " + relativeDirectoryPath); + logger.fine("relativeDirectoryPath finally: " + relativeDirectoryPath); filename = path.substring(filenameStart); endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; @@ -171,7 +178,7 @@ protected void validatePath(String relPath) throws IOException { } else { try { String endpoint = findMatchingEndpoint(relPath, allowedEndpoints); - logger.info(endpoint + " " + relPath); + logger.fine(endpoint + " " + relPath); if (endpoint == null || !Paths.get(endpoint, relPath).normalize().startsWith(endpoint)) { throw new IOException( @@ -189,7 +196,6 @@ protected void validatePath(String relPath) throws IOException { public long retrieveSizeFromMedia() { parsePath(); String globusAccessToken = retrieveGlobusAccessToken(); - logger.info("GAT2: " + globusAccessToken); // Construct Globus URL URI absoluteURI = null; try { @@ -198,13 +204,12 @@ public long retrieveSizeFromMedia() { + "/ls?path=" + endpointPath + relativeDirectoryPath + "&filter=name:" + filename); HttpGet get = new HttpGet(absoluteURI); - logger.info("Token is " + globusAccessToken); get.addHeader("Authorization", "Bearer " + globusAccessToken); CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); if (response.getStatusLine().getStatusCode() == 200) { // Get reponse as string String responseString = EntityUtils.toString(response.getEntity()); - logger.info("Response from " + get.getURI().toString() + " is: " + responseString); + logger.fine("Response from " + get.getURI().toString() + " is: " + responseString); JsonObject responseJson = JsonUtil.getJsonObject(responseString); JsonArray dataArray = responseJson.getJsonArray("DATA"); if (dataArray != null && dataArray.size() != 0) { @@ -214,7 +219,7 @@ public long retrieveSizeFromMedia() { } else { logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode()); - logger.info(EntityUtils.toString(response.getEntity())); + logger.fine(EntityUtils.toString(response.getEntity())); } } catch (URISyntaxException e) { // Should have been caught in validatePath @@ -258,16 +263,15 @@ public void delete() throws IOException { absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/submission_id"); HttpGet get = new HttpGet(absoluteURI); - logger.info("Token is " + globusAccessToken); get.addHeader("Authorization", "Bearer " + globusAccessToken); CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); if (response.getStatusLine().getStatusCode() == 200) { // Get reponse as string String responseString = EntityUtils.toString(response.getEntity()); - logger.info("Response from " + get.getURI().toString() + " is: " + responseString); + logger.fine("Response from " + get.getURI().toString() + " is: " + responseString); JsonObject responseJson = JsonUtil.getJsonObject(responseString); String submissionId = responseJson.getString("value"); - logger.info("submission_id for delete is: " + submissionId); + logger.fine("submission_id for delete is: " + submissionId); absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/delete"); HttpPost post = new HttpPost(absoluteURI); JsonObjectBuilder taskJsonBuilder = Json.createObjectBuilder(); @@ -277,30 +281,30 @@ public void delete() throws IOException { post.setHeader("Content-Type", "application/json"); post.addHeader("Authorization", "Bearer " + globusAccessToken); String taskJson= JsonUtil.prettyPrint(taskJsonBuilder.build()); - logger.info("Sending: " + taskJson); + logger.fine("Sending: " + taskJson); post.setEntity(new StringEntity(taskJson, "utf-8")); CloseableHttpResponse postResponse = getSharedHttpClient().execute(post, localContext); int statusCode=postResponse.getStatusLine().getStatusCode(); - logger.info("Response :" + statusCode + ": " +postResponse.getStatusLine().getReasonPhrase()); + logger.fine("Response :" + statusCode + ": " +postResponse.getStatusLine().getReasonPhrase()); switch (statusCode) { case 202: // ~Success - delete task was accepted - logger.info("Globus delete initiated: " + EntityUtils.toString(postResponse.getEntity())); + logger.fine("Globus delete initiated: " + EntityUtils.toString(postResponse.getEntity())); break; case 200: // Duplicate - delete task was already accepted - logger.info("Duplicate Globus delete: " + EntityUtils.toString(postResponse.getEntity())); + logger.warning("Duplicate Globus delete: " + EntityUtils.toString(postResponse.getEntity())); break; default: logger.warning("Response from " + post.getURI().toString() + " was " + postResponse.getStatusLine().getStatusCode()); - logger.info(EntityUtils.toString(postResponse.getEntity())); + logger.fine(EntityUtils.toString(postResponse.getEntity())); } } else { logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode()); - logger.info(EntityUtils.toString(response.getEntity())); + logger.fine(EntityUtils.toString(response.getEntity())); } } catch (Exception e) { logger.warning(e.getMessage()); @@ -383,7 +387,7 @@ public String getStorageLocation() throws IOException { */ protected void configureGlobusEndpoints() throws IOException { allowedEndpoints = getAllowedEndpoints(this.driverId); - logger.info("Set allowed endpoints: " + Arrays.toString(allowedEndpoints)); + logger.fine("Set allowed endpoints: " + Arrays.toString(allowedEndpoints)); } private static String[] getAllowedEndpoints(String driverId) throws IOException { @@ -409,37 +413,91 @@ private static String[] getAllowedEndpoints(String driverId) throws IOException @Override - public void open(DataAccessOption... option) throws IOException { - // TODO Auto-generated method stub - - } + public void open(DataAccessOption... options) throws IOException { + + baseStore.open(options); + + DataAccessRequest req = this.getRequest(); + + if (isWriteAccessRequested(options)) { + isWriteAccess = true; + isReadAccess = false; + } else { + isWriteAccess = false; + isReadAccess = true; + } + + if (dvObject instanceof DataFile) { + String storageIdentifier = dvObject.getStorageIdentifier(); + + DataFile dataFile = this.getDataFile(); + + if (req != null && req.getParameter("noVarHeader") != null) { + baseStore.setNoVarHeader(true); + } + + if (storageIdentifier == null || "".equals(storageIdentifier)) { + throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); + } + + logger.fine("StorageIdentifier is: " + storageIdentifier); + + if (isReadAccess) { + if (dataFile.getFilesize() >= 0) { + this.setSize(dataFile.getFilesize()); + } else { + logger.fine("Setting size"); + this.setSize(retrieveSizeFromMedia()); + } + // Only applies for the S3 Connector case (where we could have run an ingest) + if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + + List datavariables = dataFile.getDataTable().getDataVariables(); + String varHeaderLine = generateVariableHeader(datavariables); + this.setVarHeader(varHeaderLine); + } + + } + this.setMimeType(dataFile.getContentType()); + + try { + this.setFileName(dataFile.getFileMetadata().getLabel()); + } catch (Exception ex) { + this.setFileName("unknown"); + } + } else if (dvObject instanceof Dataset) { + throw new IOException( + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); + } else if (dvObject instanceof Dataverse) { + throw new IOException( + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); + } + } @Override public Path getFileSystemPath() throws IOException { - // TODO Auto-generated method stub - return null; + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } - @Override public void savePath(Path fileSystemPath) throws IOException { - // TODO Auto-generated method stub - + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } - @Override public void saveInputStream(InputStream inputStream) throws IOException { - // TODO Auto-generated method stub - + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } - @Override public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { - // TODO Auto-generated method stub - + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } - + } From 38c120e13d2e1276324b903be58306520168b577 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Dec 2023 18:21:41 -0500 Subject: [PATCH 287/414] cleanup, delete obsolete methods, change to private, info->fine --- .../harvard/iq/dataverse/api/Datasets.java | 4 +- .../dataverse/globus/GlobusServiceBean.java | 461 +++--------------- .../iq/dataverse/settings/JvmSettings.java | 2 +- src/main/webapp/globus.xhtml | 30 -- 4 files changed, 78 insertions(+), 419 deletions(-) delete mode 100644 src/main/webapp/globus.xhtml diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index ae576134be3..cb57acd3b86 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3543,7 +3543,7 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat } else { params.add("referenceEndpointsWithPaths", referenceEndpointsWithPaths); } - int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class); + int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class); JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths"; allowedApiCalls.add( @@ -3833,7 +3833,7 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P params.add("endpoint", transferEndpoint); } params.add("files", files); - int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class); + int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class); JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "monitorGlobusDownload") .add(URLTokenUtil.HTTP_METHOD, "POST") diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 0c991424ce9..37959188857 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -97,34 +97,6 @@ public class GlobusServiceBean implements java.io.Serializable { private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); - private String code; - private String userTransferToken; - private String state; - - public String getState() { - return state; - } - - public void setState(String state) { - this.state = state; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getUserTransferToken() { - return userTransferToken; - } - - public void setUserTransferToken(String userTransferToken) { - this.userTransferToken = userTransferToken; - } - private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions) throws MalformedURLException { @@ -152,33 +124,6 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi return null; } - /* - * public void updatePermision(AccessToken clientTokenUser, String directory, - * String principalType, String perm) throws MalformedURLException { if - * (directory != null && !directory.equals("")) { directory = directory + "/"; } - * logger.info("Start updating permissions." + " Directory is " + directory); - * String globusEndpoint = - * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - * ArrayList rules = checkPermisions(clientTokenUser, directory, - * globusEndpoint, principalType, null); logger.info("Size of rules " + - * rules.size()); int count = 0; while (count < rules.size()) { - * logger.info("Start removing rules " + rules.get(count)); Permissions - * permissions = new Permissions(); permissions.setDATA_TYPE("access"); - * permissions.setPermissions(perm); permissions.setPath(directory); - * - * Gson gson = new GsonBuilder().create(); URL url = new - * URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint - * + "/access/" + rules.get(count)); - * logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + - * globusEndpoint + "/access/" + rules.get(count)); MakeRequestResponse result = - * makeRequest(url, "Bearer", - * clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", - * gson.toJson(permissions)); if (result.status != 200) { - * logger.warning("Cannot update access rule " + rules.get(count)); } else { - * logger.info("Access rule " + rules.get(count) + " was updated"); } count++; } - * } - */ - /** * Call to delete a globus rule related to the specified dataset. * @@ -214,6 +159,13 @@ public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger } } + /** Request read/write access for the specified principal and generate a list of accessible paths for new files for the specified dataset. + * + * @param principal - the id of the Globus principal doing the transfer + * @param dataset + * @param numberOfPaths - how many files are to be transferred + * @return + */ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int numberOfPaths) { GlobusEndpoint endpoint = getGlobusEndpoint(dataset); @@ -278,6 +230,12 @@ private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissi } } + /** Given an array of remote files to be referenced in the dataset, create a set of valid storage identifiers and return a map of the remote file paths to storage identifiers. + * + * @param dataset + * @param referencedFiles - a JSON array of remote files to be referenced in the dataset - each should be a string with the /path/to/file + * @return - a map of supplied paths to valid storage identifiers + */ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) { String driverId = dataset.getEffectiveStorageDriverId(); JsonArray endpoints = GlobusAccessibleStore.getReferenceEndpointsWithPaths(driverId); @@ -304,39 +262,38 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref return fileMap.build(); } + + /** A cache of temporary permission requests - for upload (rw) and download (r) access. + * When a temporary permission request is created, it is added to the cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started, the permission will be revoked/deleted. + * (If a transfer has been started, the permission will not be revoked/deleted until the transfer is complete. This is handled in other methods.) + */ // Single cache of open rules/permission requests private final Cache rulesCache = Caffeine.newBuilder() .expireAfterWrite( - Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) + Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) .scheduler(Scheduler.systemScheduler()).evictionListener((ruleId, datasetId, cause) -> { // Delete rules that expire - logger.info("Rule " + ruleId + " expired"); + logger.fine("Rule " + ruleId + " expired"); Dataset dataset = datasetSvc.find(datasetId); deletePermission((String) ruleId, dataset, logger); }) .build(); + //Convenience method to add a temporary permission request to the cache - allows logging of temporary permission requests private void monitorTemporaryPermissions(String ruleId, long datasetId) { - logger.info("Adding rule " + ruleId + " for dataset " + datasetId); + logger.fine("Adding rule " + ruleId + " for dataset " + datasetId); rulesCache.put(ruleId, datasetId); } - public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException { - - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId - + "/successful_transfers"); - - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); - - if (result.status == 200) { - logger.info(" SUCCESS ====== "); - return true; - } - return false; - } - +/** Call the Globus API to get info about the transfer. + * + * @param accessToken + * @param taskId - the Globus task id supplied by the user + * @param globusLogger - the transaction-specific logger to use (separate log files are created in general, some calls may use the class logger) + * @return + * @throws MalformedURLException + */ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); @@ -356,6 +313,11 @@ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger return task; } + /** Globus call to get an access token for the user using the long-term token we hold. + * + * @param globusBasicToken - the base64 encoded Globus Basic token comprised of the : + * @return - a valid Globus access token + */ public static AccessToken getClientToken(String globusBasicToken) { URL url; AccessToken clientTokenUser = null; @@ -375,36 +337,7 @@ public static AccessToken getClientToken(String globusBasicToken) { return clientTokenUser; } - public AccessToken getAccessToken(HttpServletRequest origRequest, String globusBasicToken) - throws UnsupportedEncodingException, MalformedURLException { - String serverName = origRequest.getServerName(); - if (serverName.equals("localhost")) { - logger.severe("Changing localhost to utoronto"); - serverName = "utl-192-123.library.utoronto.ca"; - } - - String redirectURL = "https://" + serverName + "/globus.xhtml"; - - redirectURL = URLEncoder.encode(redirectURL, "UTF-8"); - - URL url = new URL("https://auth.globus.org/v2/oauth2/token?code=" + code + "&redirect_uri=" + redirectURL - + "&grant_type=authorization_code"); - logger.info(url.toString()); - - MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); - AccessToken accessTokenUser = null; - - if (result.status == 200) { - logger.info("Access Token: \n" + result.toString()); - accessTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); - logger.info(accessTokenUser.getAccessToken()); - } - - return accessTokenUser; - - } - - public static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, + private static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, String jsonString) { String str = null; HttpURLConnection connection = null; @@ -412,9 +345,8 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a try { connection = (HttpURLConnection) url.openConnection(); // Basic - // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 logger.info(authType + " " + authCode); - logger.info("For URL: " + url.toString()); + logger.fine("For URL: " + url.toString()); connection.setRequestProperty("Authorization", authType + " " + authCode); // connection.setRequestProperty("Content-Type", // "application/x-www-form-urlencoded"); @@ -422,7 +354,7 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a if (jsonString != null) { connection.setRequestProperty("Content-Type", "application/json"); connection.setRequestProperty("Accept", "application/json"); - logger.info(jsonString); + logger.fine(jsonString); connection.setDoOutput(true); OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream()); @@ -431,24 +363,21 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a } status = connection.getResponseCode(); - logger.info("Status now " + status); + logger.fine("Status now " + status); InputStream result = connection.getInputStream(); if (result != null) { - logger.info("Result is not null"); str = readResultJson(result).toString(); - logger.info("str is "); - logger.info(result.toString()); + logger.fine("str is " + result.toString()); } else { - logger.info("Result is null"); + logger.fine("Result is null"); str = null; } - logger.info("status: " + status); + logger.fine("status: " + status); } catch (IOException ex) { - logger.info("IO"); logger.severe(ex.getMessage()); - logger.info(ex.getCause().toString()); - logger.info(ex.getStackTrace().toString()); + logger.fine(ex.getCause().toString()); + logger.fine(ex.getStackTrace().toString()); } finally { if (connection != null) { connection.disconnect(); @@ -461,16 +390,14 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a private static StringBuilder readResultJson(InputStream in) { StringBuilder sb = null; - try { - - BufferedReader br = new BufferedReader(new InputStreamReader(in)); + try (BufferedReader br = new BufferedReader(new InputStreamReader(in))) { sb = new StringBuilder(); String line; while ((line = br.readLine()) != null) { sb.append(line + "\n"); } br.close(); - logger.info(sb.toString()); + logger.fine(sb.toString()); } catch (IOException e) { sb = null; logger.severe(e.getMessage()); @@ -495,31 +422,6 @@ private static T parseJson(String sb, Class jsonParserClass, boolean nami } } - public String getDirectory(String datasetId) { - Dataset dataset = null; - String directory = null; - try { - dataset = datasetSvc.find(Long.parseLong(datasetId)); - if (dataset == null) { - logger.severe("Dataset not found " + datasetId); - return null; - } - String storeId = dataset.getStorageIdentifier(); - storeId.substring(storeId.indexOf("//") + 1); - directory = storeId.substring(storeId.indexOf("//") + 1); - logger.info(storeId); - logger.info(directory); - logger.info("Storage identifier:" + dataset.getIdentifierForFileStorage()); - return directory; - - } catch (NumberFormatException nfe) { - logger.severe(nfe.getMessage()); - - return null; - } - - } - static class MakeRequestResponse { public String jsonResponse; public int status; @@ -531,53 +433,26 @@ static class MakeRequestResponse { } - /* - * unused - may be needed for S3 case private MakeRequestResponse - * findDirectory(String directory, String clientToken, String globusEndpoint) - * throws MalformedURLException { URL url = new - * URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint - * + "/ls?path=" + directory + "/"); - * - * MakeRequestResponse result = makeRequest(url, "Bearer", clientToken, "GET", - * null); logger.info("find directory status:" + result.status); - * - * return result; } - */ - /* - * public boolean giveGlobusPublicPermissions(Dataset dataset) throws - * UnsupportedEncodingException, MalformedURLException { - * - * GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - * - * - * MakeRequestResponse status = findDirectory(endpoint.getBasePath(), - * endpoint.getClientToken(), endpoint.getId()); - * - * if (status.status == 200) { - * - * int perStatus = givePermission("all_authenticated_users", "", "r", dataset); - * logger.info("givePermission status " + perStatus); if (perStatus == 409) { - * logger.info("Permissions already exist or limit was reached"); } else if - * (perStatus == 400) { logger.info("No directory in Globus"); } else if - * (perStatus != 201 && perStatus != 200) { - * logger.info("Cannot give read permission"); return false; } + /** + * Cache of open download Requests This cache keeps track of the set of files + * selected for transfer out (download) via Globus. It is a means of + * transferring the list from the DatasetPage, where it is generated via user UI + * actions, and the Datasets/globusDownloadParameters API. * - * } else if (status.status == 404) { - * logger.info("There is no globus directory"); } else { - * logger.severe("Cannot find directory in globus, status " + status); return - * false; } + * Nominally, the dataverse-globus app will call that API endpoint and then + * /requestGlobusDownload, at which point the cached info is sent to the app. If + * the app doesn't call within 5 minutes (the time allowed to call + * /globusDownloadParameters) + GLOBUS_CACHE_MAXAGE minutes (a ~longer period + * giving the user time to make choices in the app), the cached info is deleted. * - * return true; } */ - - // Single cache of open rules/permission requests private final Cache downloadCache = Caffeine.newBuilder() .expireAfterWrite( - Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) + 5, ChronoUnit.MINUTES)) + Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class) + 5, ChronoUnit.MINUTES)) .scheduler(Scheduler.systemScheduler()).evictionListener((downloadId, datasetId, cause) -> { // Delete downloads that expire - logger.info("Download for " + downloadId + " expired"); + logger.fine("Download for " + downloadId + " expired"); }) .build(); @@ -600,11 +475,18 @@ public int setPermissionForDownload(Dataset dataset, String principal) { return requestPermission(endpoint, dataset, permissions); } - // Generates the URL to launch the Globus app + // Generates the URL to launch the Globus app for upload public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); } + /** Generated the App URl for upload (in) or download (out) + * + * @param d - the dataset involved + * @param upload - boolean, true for upload, false for download + * @param dataFiles - a list of the DataFiles to be downloaded + * @return + */ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List dataFiles) { String localeCode = session.getLocaleCode(); ApiToken apiToken = null; @@ -654,7 +536,7 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List dataFiles, Dataset d) { return filesBuilder.build(); } - public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List downloadDFList) { + private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List downloadDFList) { return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadDFList)); } @@ -718,7 +600,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S GlobusEndpoint endpoint = getGlobusEndpoint(dataset); GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); - logger.info("Found rule: " + ruleId); + logger.fine("Found rule: " + ruleId); if (ruleId != null) { Long datasetId = rulesCache.getIfPresent(ruleId); if (datasetId != null) { @@ -812,8 +694,8 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // calculateMissingMetadataFields: checksum, mimetype JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - logger.info("Size: " + newfilesJsonArray.size()); - logger.info("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); + logger.fine("Size: " + newfilesJsonArray.size()); + logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { @@ -1227,198 +1109,8 @@ public String calculatemime(String fileName) throws InterruptedException { return finalType; } - /* - * public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) - * throws MalformedURLException { - * - * logger.info("=====Tasklist == dataset id :" + dataset.getId()); String - * directory = null; - * - * try { - * - * List fileMetadatas = new ArrayList<>(); - * - * StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - * - * - * - * DatasetVersion workingVersion = dataset.getEditVersion(); - * - * if (workingVersion.getCreateTime() != null) { - * workingVersion.setCreateTime(new Timestamp(new Date().getTime())); } - * - * directory = dataset.getAuthorityForFileStorage() + "/" + - * dataset.getIdentifierForFileStorage(); - * - * System.out.println("======= directory ==== " + directory + - * " ==== datasetId :" + dataset.getId()); Map checksumMapOld - * = new HashMap<>(); - * - * Iterator fmIt = workingVersion.getFileMetadatas().iterator(); - * - * while (fmIt.hasNext()) { FileMetadata fm = fmIt.next(); if (fm.getDataFile() - * != null && fm.getDataFile().getId() != null) { String chksum = - * fm.getDataFile().getChecksumValue(); if (chksum != null) { - * checksumMapOld.put(chksum, 1); } } } - * - * List dFileList = new ArrayList<>(); boolean update = false; for - * (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { - * - * String s3ObjectKey = s3ObjectSummary.getKey(); - * - * - * String t = s3ObjectKey.replace(directory, ""); - * - * if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String - * filePath = s3ObjectKey; String fileName = - * filePath.split("/")[filePath.split("/").length - 1]; String fullPath = - * datasetSIO.getStorageLocation() + "/" + fileName; - * - * logger.info("Full path " + fullPath); StorageIO dataFileStorageIO = - * DataAccess.getDirectStorageIO(fullPath); InputStream in = - * dataFileStorageIO.getInputStream(); - * - * String checksumVal = FileUtil.calculateChecksum(in, - * DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); - * logger.info("The checksum is " + checksumVal); if - * ((checksumMapOld.get(checksumVal) != null)) { logger.info("datasetId :" + - * dataset.getId() + "======= filename ==== " + filePath + - * " == file already exists "); } else if (filePath.contains("cached") || - * filePath.contains(".thumb")) { logger.info(filePath + " is ignored"); } else - * { update = true; logger.info("datasetId :" + dataset.getId() + - * "======= filename ==== " + filePath + " == new file "); try { - * - * DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); - * //MIME_TYPE_GLOBUS datafile.setModificationTime(new Timestamp(new - * Date().getTime())); datafile.setCreateDate(new Timestamp(new - * Date().getTime())); datafile.setPermissionModificationTime(new Timestamp(new - * Date().getTime())); - * - * FileMetadata fmd = new FileMetadata(); - * - * - * fmd.setLabel(fileName); fmd.setDirectoryLabel(filePath.replace(directory, - * "").replace(File.separator + fileName, "")); - * - * fmd.setDataFile(datafile); - * - * datafile.getFileMetadatas().add(fmd); - * - * FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile); - * logger.info("==== datasetId :" + dataset.getId() + "======= filename ==== " - * + filePath + " == added to datafile, filemetadata "); - * - * try { // We persist "SHA1" rather than "SHA-1". - * //datafile.setChecksumType(DataFile.ChecksumType.SHA1); - * datafile.setChecksumType(DataFile.ChecksumType.MD5); - * datafile.setChecksumValue(checksumVal); } catch (Exception cksumEx) { - * logger.info("==== datasetId :" + dataset.getId() + - * "======Could not calculate checksumType signature for the new file "); } - * - * datafile.setFilesize(totalSize); - * - * dFileList.add(datafile); - * - * } catch (Exception ioex) { logger.info("datasetId :" + dataset.getId() + - * "======Failed to process and/or save the file " + ioex.getMessage()); return - * false; - * - * } } } } if (update) { - * - * List filesAdded = new ArrayList<>(); - * - * if (dFileList != null && dFileList.size() > 0) { - * - * // Dataset dataset = version.getDataset(); - * - * for (DataFile dataFile : dFileList) { - * - * if (dataFile.getOwner() == null) { dataFile.setOwner(dataset); - * - * workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); - * dataFile.getFileMetadata().setDatasetVersion(workingVersion); - * dataset.getFiles().add(dataFile); - * - * } - * - * filesAdded.add(dataFile); - * - * } - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ===== Done! Finished saving new files to the dataset."); } - * - * fileMetadatas.clear(); for (DataFile addedFile : filesAdded) { - * fileMetadatas.add(addedFile.getFileMetadata()); } filesAdded = null; - * - * if (workingVersion.isDraft()) { - * - * logger.info("Async: ==== datasetId :" + dataset.getId() + - * " ==== inside draft version "); - * - * Timestamp updateTime = new Timestamp(new Date().getTime()); - * - * workingVersion.setLastUpdateTime(updateTime); - * dataset.setModificationTime(updateTime); - * - * - * for (FileMetadata fileMetadata : fileMetadatas) { - * - * if (fileMetadata.getDataFile().getCreateDate() == null) { - * fileMetadata.getDataFile().setCreateDate(updateTime); - * fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); } - * fileMetadata.getDataFile().setModificationTime(updateTime); } - * - * - * } else { logger.info("datasetId :" + dataset.getId() + - * " ==== inside released version "); - * - * for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { for - * (FileMetadata fileMetadata : fileMetadatas) { if - * (fileMetadata.getDataFile().getStorageIdentifier() != null) { - * - * if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion. - * getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { - * workingVersion.getFileMetadatas().set(i, fileMetadata); } } } } - * - * - * } - * - * - * try { Command cmd; logger.info("Async: ==== datasetId :" + - * dataset.getId() + - * " ======= UpdateDatasetVersionCommand START in globus function "); cmd = new - * UpdateDatasetVersionCommand(dataset, new DataverseRequest(user, - * (HttpServletRequest) null)); ((UpdateDatasetVersionCommand) - * cmd).setValidateLenient(true); //new DataverseRequest(authenticatedUser, - * (HttpServletRequest) null) //dvRequestService.getDataverseRequest() - * commandEngine.submit(cmd); } catch (CommandException ex) { - * logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + - * "======CommandException updating DatasetVersion from batch job: " + - * ex.getMessage()); return false; } - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ======= GLOBUS CALL COMPLETED SUCCESSFULLY "); - * - * //return true; } - * - * } catch (Exception e) { String message = e.getMessage(); - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ======= GLOBUS CALL Exception ============== " + message); - * e.printStackTrace(); return false; //return - * error(Response.Status.INTERNAL_SERVER_ERROR, - * "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" - * + message + "'."); } - * - * String globusBasicToken = - * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - * AccessToken clientTokenUser = getClientToken(globusBasicToken); - * updatePermision(clientTokenUser, directory, "identity", "r"); return true; } - * - */ - GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { + private GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { Dataset dataset = null; if (dvObject instanceof Dataset) { dataset = (Dataset) dvObject; @@ -1435,8 +1127,6 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { if (GlobusAccessibleStore.isDataverseManaged(driverId) && (dataset != null)) { directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); - logger.info("directoryPath now: " + directoryPath); - } else { // remote store - may have path in file storageidentifier String relPath = dvObject.getStorageIdentifier() @@ -1446,17 +1136,16 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { directoryPath = directoryPath + relPath.substring(0, filenameStart); } } - logger.info("directoryPath finally: " + directoryPath); + logger.fine("directoryPath finally: " + directoryPath); String endpointId = GlobusAccessibleStore.getTransferEndpointId(driverId); - logger.info("endpointId: " + endpointId); + logger.fine("endpointId: " + endpointId); String globusToken = GlobusAccessibleStore.getGlobusToken(driverId); AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); String clientToken = accessToken.getOtherTokens().get(0).getAccessToken(); - logger.info("clientToken: " + clientToken); endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath); return endpoint; @@ -1484,7 +1173,7 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, DataFile df = guestbookResponse.getDataFile(); if (df != null) { - logger.info("Single datafile case for writeGuestbookAndStartTransfer"); + logger.fine("Single datafile case for writeGuestbookAndStartTransfer"); List downloadDFList = new ArrayList(1); downloadDFList.add(df); if (!doNotSaveGuestbookResponse) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index c9038047611..96a56d09c0b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -50,7 +50,7 @@ public enum JvmSettings { UPLOADS_DIRECTORY(SCOPE_FILES, "uploads"), DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"), GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), - GLOBUS_RULES_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"), + GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"), FILES(SCOPE_FILES), BASE_URL(FILES, "base-url"), GLOBUS_TOKEN(FILES, "globus-token"), diff --git a/src/main/webapp/globus.xhtml b/src/main/webapp/globus.xhtml deleted file mode 100644 index f4eebd4babf..00000000000 --- a/src/main/webapp/globus.xhtml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - - - - - - - From caa6e684390bb4c36dff45f1de94837f8b632f57 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Dec 2023 18:29:22 -0500 Subject: [PATCH 288/414] revert unrelated changes, old settings --- .../harvest/server/web/servlet/OAIServlet.java | 15 ++++++++++----- .../iq/dataverse/settings/JvmSettings.java | 5 +---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 19901cae796..96a19acc0e8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -73,13 +73,18 @@ public class OAIServlet extends HttpServlet { @EJB SystemConfig systemConfig; + + @Inject + @ConfigProperty(name = "dataverse.oai.server.maxidentifiers", defaultValue="100") + private Integer maxListIdentifiers; - //Todo - revert this change - added to get past some local compile issues - private Integer maxListIdentifiers=100; - - private Integer maxListSets=100; + @Inject + @ConfigProperty(name = "dataverse.oai.server.maxsets", defaultValue="100") + private Integer maxListSets; - private Integer maxListRecords=10; + @Inject + @ConfigProperty(name = "dataverse.oai.server.maxrecords", defaultValue="10") + private Integer maxListRecords; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.web.servlet.OAIServlet"); // If we are going to stick with this solution - of providing a minimalist diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 96a56d09c0b..fb85ae9adab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -50,10 +50,7 @@ public enum JvmSettings { UPLOADS_DIRECTORY(SCOPE_FILES, "uploads"), DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"), GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), - GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"), - FILES(SCOPE_FILES), - BASE_URL(FILES, "base-url"), - GLOBUS_TOKEN(FILES, "globus-token"), + GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-cache-maxage"), // SOLR INDEX SETTINGS SCOPE_SOLR(PREFIX, "solr"), From 3babc5aac25710dcc92a90ae861a7b21eef43742 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 4 Dec 2023 20:35:56 -0500 Subject: [PATCH 289/414] moving the StorageUse member to DvObjectContainer from DvObject; moving the em.merge()/em.persist() to the djb. #8549 --- .../java/edu/harvard/iq/dataverse/DataFile.java | 17 ----------------- .../iq/dataverse/DataverseServiceBean.java | 17 ++++++++++++++++- .../java/edu/harvard/iq/dataverse/DvObject.java | 14 -------------- .../harvard/iq/dataverse/DvObjectContainer.java | 14 ++++++++++++-- .../command/impl/SetCollectionQuotaCommand.java | 15 +-------------- .../storageuse/StorageUseServiceBean.java | 1 - 6 files changed, 29 insertions(+), 49 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 2770118d41b..3d8086b142b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -641,23 +641,6 @@ public String getFriendlySize() { } } - /** - * Experimental - record the pre-calculated "storage size" of the file, and - * all its associated auxiliary file objects: - - @Column(nullable = true) - private Long storageSize; - - - public Long getStorageSize() { - return storageSize; - } - - public void setStorageSize(Long storageSize) { - this.storageSize = storageSize; - } - * */ - public boolean isRestricted() { return restricted; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 549b8310122..487215c7a65 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -18,6 +18,7 @@ import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.SolrSearchResult; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; @@ -919,5 +920,19 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { return em.createNativeQuery(cqString).getResultList(); } - + public void saveStorageQuota(Dataverse target, Long allocation) { + StorageQuota storageQuota = target.getStorageQuota(); + + if (storageQuota != null) { + storageQuota.setAllocation(allocation); + em.merge(storageQuota); + } else { + storageQuota = new StorageQuota(); + storageQuota.setDefinitionPoint(target); + storageQuota.setAllocation(allocation); + target.setStorageQuota(storageQuota); + em.persist(storageQuota); + } + em.flush(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 515d9f9f153..df249e04663 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -3,7 +3,6 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.storageuse.StorageQuota; -import edu.harvard.iq.dataverse.storageuse.StorageUse; import java.sql.Timestamp; import java.text.SimpleDateFormat; @@ -182,10 +181,6 @@ public void setAlternativePersistentIndentifiers(Set roleAssignments; - /** - * Should only be used in constructors for DvObjectContainers (Datasets and - * Collections), to make sure new entries are created and persisted in the - * database StorageUse table for every DvObject container we create. - * @param storageUse - */ - public void setStorageUse(StorageUse storageUse) { - this.storageUse = storageUse; - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java index 2f391e394fa..82057315fbb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java @@ -2,11 +2,9 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.storageuse.StorageUse; import edu.harvard.iq.dataverse.util.SystemConfig; import jakarta.persistence.CascadeType; -import java.util.Locale; import java.util.Optional; import jakarta.persistence.MappedSuperclass; @@ -45,6 +43,9 @@ public boolean isEffectivelyPermissionRoot() { private Boolean guestbookAtRequest = null; + @OneToOne(mappedBy = "dvObjectContainer",cascade={ CascadeType.REMOVE, CascadeType.PERSIST}, orphanRemoval=true) + private StorageUse storageUse; + public String getEffectiveStorageDriverId() { String id = storageDriver; if (StringUtils.isBlank(id)) { @@ -165,4 +166,13 @@ public void setCurationLabelSetName(String setName) { this.externalLabelSetName = setName; } + /** + * Should only be used in constructors for DvObjectContainers (Datasets and + * Collections), to make sure new entries are created and persisted in the + * database StorageUse table for every DvObject container we create. + * @param storageUse + */ + public void setStorageUse(StorageUse storageUse) { + this.storageUse = storageUse; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java index cf8fb6fd42e..e52c47a5e7d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java @@ -9,7 +9,6 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.logging.Logger; @@ -49,18 +48,6 @@ public void executeImpl(CommandContext ctxt) throws CommandException { throw new IllegalCommandException("Must specify valid allocation in bytes", this); } - StorageQuota storageQuota = dataverse.getStorageQuota(); - - if (storageQuota != null) { - storageQuota.setAllocation(allocation); - ctxt.em().merge(storageQuota); - } else { - storageQuota = new StorageQuota(); - storageQuota.setDefinitionPoint(dataverse); - storageQuota.setAllocation(allocation); - dataverse.setStorageQuota(storageQuota); - ctxt.em().persist(storageQuota); - } - ctxt.em().flush(); + ctxt.dataverses().saveStorageQuota(dataverse, allocation); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java index 18e4ef49640..fbaaff22dee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.storageuse; -import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.settings.JvmSettings; import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; From dfa2dc3853254bc8c58bedbfd288a63bcfa07b32 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 04:38:46 -0500 Subject: [PATCH 290/414] remove adaptation for quotas PR that was itself changed --- .../impl/CreateNewDataFilesCommand.java | 24 ++----------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index 269ba47643b..0470f59b861 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -3,20 +3,18 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; -import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +//import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota; import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; @@ -85,7 +83,7 @@ public class CreateNewDataFilesCommand extends AbstractCommand sio; - try { - sio = DataAccess.getDirectStorageIO(DataAccess.getLocationFromStorageId(newStorageIdentifier, version.getDataset())); - - // get file size - // Note - some stores (e.g. AWS S3) only offer eventual consistency and a call - // to get the size immediately after uploading may fail. As of the addition of - // PR#9409 adding storage quotas, we are now requiring size to be available - // earlier. If this is seen, adding - // a delay/retry may help - newFileSize = sio.retrieveSizeFromMedia(); - } catch (IOException e) { - // If we don't get a file size, a CommandExecutionException will be thrown later in the code - e.printStackTrace(); - } - } } // Finally, if none of the special cases above were applicable (or // if we were unable to unpack an uploaded file, etc.), we'll just From c78613e60ca7a2442753d6382b0ace3c7fd07316 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 5 Dec 2023 08:42:23 -0500 Subject: [PATCH 291/414] one more refinement for the flyway script. #8549 --- .../storageuse/StorageUseServiceBean.java | 33 ++++++++++--------- .../V6.0.0.5__8549-collection-quotas.sql | 13 ++++++++ 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java index fbaaff22dee..7aea7a7b596 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -46,23 +46,24 @@ public Long findStorageSizeByDvContainerId(Long dvObjectId) { */ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void incrementStorageSizeRecursively(Long dvObjectContainerId, Long increment) { - //@todo should throw exceptions if either parameter is null - Optional allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class); - if (!(allow.isPresent() && allow.get())) { - String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n" - + "(" - + " SELECT id, owner_id\n" - + " FROM dvobject\n" - + " WHERE id=" + dvObjectContainerId + "\n" - + " UNION ALL\n" - + " SELECT dvobject.id, dvobject.owner_id\n" - + " FROM dvobject\n" - + " JOIN uptree ON dvobject.id = uptree.owner_id)\n" - + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n" - + "FROM uptree\n" - + "WHERE dvobjectcontainer_id = uptree.id;"; + if (dvObjectContainerId != null && increment != null) { + Optional allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class); + if (!(allow.isPresent() && allow.get())) { + String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n" + + "(" + + " SELECT id, owner_id\n" + + " FROM dvobject\n" + + " WHERE id=" + dvObjectContainerId + "\n" + + " UNION ALL\n" + + " SELECT dvobject.id, dvobject.owner_id\n" + + " FROM dvobject\n" + + " JOIN uptree ON dvobject.id = uptree.owner_id)\n" + + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n" + + "FROM uptree\n" + + "WHERE dvobjectcontainer_id = uptree.id;"; - int parentsUpdated = em.createNativeQuery(queryString).executeUpdate(); + int parentsUpdated = em.createNativeQuery(queryString).executeUpdate(); + } } // @todo throw an exception if the number of parent dvobjects updated by // the query is < 2 - ? diff --git a/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql index 3657642c267..d6c067056ec 100644 --- a/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql +++ b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql @@ -38,6 +38,19 @@ AND fileobject.id = file.id AND dt.datafile_id = file.id GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; +-- there may also be some auxiliary files registered in the database, such as +-- the content generated and deposited by external tools - diff. privacy stats +-- being one of the example. These are also considered the "payload" files that +-- we want to count for the purposes of calculating storage use. +UPDATE dvobject SET tempStorageSize=tempStorageSize+o.combinedStorageSize +FROM (SELECT datasetobject.id, COALESCE(SUM(aux.fileSize),0) AS combinedStorageSize +FROM dvobject fileobject, dvobject datasetobject, datafile file, auxiliaryFile aux +WHERE fileobject.owner_id = datasetobject.id +AND fileobject.id = file.id +AND aux.datafile_id = file.id +GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; + + -- ... and then we can repeat the same for collections, by setting the storage size -- to the sum of the storage sizes of the datasets *directly* in each collection: -- (no attemp is made yet to recursively count the sizes all the chilld sub-collections) From 0c02b15aab711acbfb7f2c957c4482313b3997b9 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 5 Dec 2023 09:50:33 -0500 Subject: [PATCH 292/414] try QDR /logo endpoint --- .../edu/harvard/iq/dataverse/api/Datasets.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index af6059cf882..828ba218cc4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1971,6 +1971,22 @@ public Response getDatasetThumbnail(@PathParam("id") String idSupplied) { } } + @GET + @Produces({ "image/png" }) + @Path("{id}/logo") + public Response getDatasetLogo(@PathParam("id") String idSupplied) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + InputStream is = DatasetUtil.getLogoAsInputStream(dataset); + if (is == null) { + return notFound("Logo not available"); + } + return Response.ok(is).build(); + } catch (WrappedResponse wr) { + return notFound("Logo not available"); + } + } + // TODO: Rather than only supporting looking up files by their database IDs (dataFileIdSupplied), consider supporting persistent identifiers. @POST @AuthRequired From 8c9f1242d53aea5ecc906bd4a2a3f5d12a884224 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 5 Dec 2023 10:13:53 -0500 Subject: [PATCH 293/414] switch minio to creds jenkins expects #6783 --- docker-compose-dev.yml | 9 ++++----- .../java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 6bc50f7e764..98376e255dd 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -41,8 +41,8 @@ services: -Ddataverse.files.minio1.path-style-access=true -Ddataverse.files.minio1.upload-redirect=false -Ddataverse.files.minio1.download-redirect=false - -Ddataverse.files.minio1.access-key=minioadmin - -Ddataverse.files.minio1.secret-key=minioadmin + -Ddataverse.files.minio1.access-key=4cc355_k3y + -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k35 ports: - "8080:8080" # HTTP (Dataverse Application) - "4848:4848" # HTTP (Payara Admin Console) @@ -211,9 +211,8 @@ services: volumes: - minio_storage:/data environment: - # these are the defaults but are here for clarity - MINIO_ROOT_USER: minioadmin - MINIO_ROOT_PASSWORD: minioadmin + MINIO_ROOT_USER: 4cc355_k3y + MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k35 command: server /data networks: diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index f5e4ce6a794..daf04bb3d14 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -54,8 +54,8 @@ public static void setUp() { .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyLocalStack, secretKeyLocalStack))) .withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build(); - String accessKeyMinio = "minioadmin"; - String secretKeyMinio = "minioadmin"; + String accessKeyMinio = "4cc355_k3y"; + String secretKeyMinio = "s3cr3t_4cc355_k35"; s3minio = AmazonS3ClientBuilder.standard() // https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local .withPathStyleAccessEnabled(Boolean.TRUE) From 6a7d8d1c6f76c8e54f9759f643204aa339c5bdd0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 5 Dec 2023 10:33:19 -0500 Subject: [PATCH 294/414] make assertions about users #6783 --- .../java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index daf04bb3d14..7c1531cbfaf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -99,9 +99,10 @@ public void testNonDirectUpload() { String driverLabel = "MinIO"; Response createSuperuser = UtilIT.createRandomUser(); + createSuperuser.then().assertThat().statusCode(200); String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); String superusername = UtilIT.getUsernameFromResponse(createSuperuser); - UtilIT.makeSuperUser(superusername); + UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); Response storageDrivers = listStorageDrivers(superuserApiToken); storageDrivers.prettyPrint(); // TODO where is "Local/local" coming from? @@ -118,6 +119,7 @@ public void testNonDirectUpload() { //create user who will make a dataverse/dataset Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(200); String username = UtilIT.getUsernameFromResponse(createUser); String apiToken = UtilIT.getApiTokenFromResponse(createUser); @@ -208,9 +210,10 @@ public void testDirectUpload() { String driverId = "localstack1"; String driverLabel = "LocalStack"; Response createSuperuser = UtilIT.createRandomUser(); + createSuperuser.then().assertThat().statusCode(200); String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); String superusername = UtilIT.getUsernameFromResponse(createSuperuser); - UtilIT.makeSuperUser(superusername); + UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); Response storageDrivers = listStorageDrivers(superuserApiToken); storageDrivers.prettyPrint(); // TODO where is "Local/local" coming from? @@ -227,6 +230,7 @@ public void testDirectUpload() { //create user who will make a dataverse/dataset Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(200); String username = UtilIT.getUsernameFromResponse(createUser); String apiToken = UtilIT.getApiTokenFromResponse(createUser); From b9f48913e498ec96ef8f5994c21e7bb549e747e0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 5 Dec 2023 10:41:45 -0500 Subject: [PATCH 295/414] move methods to UtilIT #6783 --- .../harvard/iq/dataverse/api/S3AccessIT.java | 75 +++---------------- .../edu/harvard/iq/dataverse/api/UtilIT.java | 50 +++++++++++++ 2 files changed, 62 insertions(+), 63 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 7c1531cbfaf..1306c30d9c1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -103,7 +103,7 @@ public void testNonDirectUpload() { String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); String superusername = UtilIT.getUsernameFromResponse(createSuperuser); UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); - Response storageDrivers = listStorageDrivers(superuserApiToken); + Response storageDrivers = UtilIT.listStorageDrivers(superuserApiToken); storageDrivers.prettyPrint(); // TODO where is "Local/local" coming from? String drivers = """ @@ -127,18 +127,18 @@ public void testNonDirectUpload() { createDataverseResponse.prettyPrint(); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - Response originalStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken); + Response originalStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); originalStorageDriver.prettyPrint(); originalStorageDriver.then().assertThat() .body("data.message", equalTo("undefined")) .statusCode(200); - Response setStorageDriverToS3 = setStorageDriver(dataverseAlias, driverLabel, superuserApiToken); + Response setStorageDriverToS3 = UtilIT.setStorageDriver(dataverseAlias, driverLabel, superuserApiToken); setStorageDriverToS3.prettyPrint(); setStorageDriverToS3.then().assertThat() .statusCode(200); - Response updatedStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken); + Response updatedStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); updatedStorageDriver.prettyPrint(); updatedStorageDriver.then().assertThat() .statusCode(200); @@ -214,7 +214,7 @@ public void testDirectUpload() { String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); String superusername = UtilIT.getUsernameFromResponse(createSuperuser); UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); - Response storageDrivers = listStorageDrivers(superuserApiToken); + Response storageDrivers = UtilIT.listStorageDrivers(superuserApiToken); storageDrivers.prettyPrint(); // TODO where is "Local/local" coming from? String drivers = """ @@ -238,18 +238,18 @@ public void testDirectUpload() { createDataverseResponse.prettyPrint(); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - Response originalStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken); + Response originalStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); originalStorageDriver.prettyPrint(); originalStorageDriver.then().assertThat() .body("data.message", equalTo("undefined")) .statusCode(200); - Response setStorageDriverToS3 = setStorageDriver(dataverseAlias, driverLabel, superuserApiToken); + Response setStorageDriverToS3 = UtilIT.setStorageDriver(dataverseAlias, driverLabel, superuserApiToken); setStorageDriverToS3.prettyPrint(); setStorageDriverToS3.then().assertThat() .statusCode(200); - Response updatedStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken); + Response updatedStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); updatedStorageDriver.prettyPrint(); updatedStorageDriver.then().assertThat() .statusCode(200); @@ -275,7 +275,7 @@ public void testDirectUpload() { // // String fileId = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.id"); long size = 1000000000l; - Response getUploadUrls = getUploadUrls(datasetPid, size, apiToken); + Response getUploadUrls = UtilIT.getUploadUrls(datasetPid, size, apiToken); getUploadUrls.prettyPrint(); getUploadUrls.then().assertThat().statusCode(200); @@ -298,7 +298,7 @@ public void testDirectUpload() { String contentsOfFile = "foobar"; InputStream inputStream = new ByteArrayInputStream(contentsOfFile.getBytes(StandardCharsets.UTF_8)); - Response uploadFileDirect = uploadFileDirect(localhostUrl, inputStream); + Response uploadFileDirect = UtilIT.uploadFileDirect(localhostUrl, inputStream); uploadFileDirect.prettyPrint(); /* Direct upload to MinIO is failing with errors like this: @@ -357,7 +357,7 @@ public void testDirectUpload() { assertEquals(contentsOfFile, s3Object); System.out.println("direct download..."); - Response getHeaders = downloadFileNoRedirect(Integer.valueOf(fileId), apiToken); + Response getHeaders = UtilIT.downloadFileNoRedirect(Integer.valueOf(fileId), apiToken); for (Header header : getHeaders.getHeaders()) { System.out.println("direct download header: " + header); } @@ -371,7 +371,7 @@ public void testDirectUpload() { } catch (UnsupportedEncodingException ex) { } - Response downloadFile = downloadFromUrl(decodedDownloadUrl); + Response downloadFile = UtilIT.downloadFromUrl(decodedDownloadUrl); downloadFile.prettyPrint(); downloadFile.then().assertThat().statusCode(200); @@ -394,55 +394,4 @@ public void testDirectUpload() { } - //TODO: move these into UtilIT. They are here for now to avoid merge conflicts - static Response listStorageDrivers(String apiToken) { - return given() - .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken) - .get("/api/admin/dataverse/storageDrivers"); - } - - static Response getStorageDriver(String dvAlias, String apiToken) { - return given() - .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken) - .get("/api/admin/dataverse/" + dvAlias + "/storageDriver"); - } - - static Response setStorageDriver(String dvAlias, String label, String apiToken) { - return given() - .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken) - .body(label) - .put("/api/admin/dataverse/" + dvAlias + "/storageDriver"); - } - - static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) { - String idInPath = idOrPersistentIdOfDataset; // Assume it's a number. - String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. - if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) { - idInPath = ":persistentId"; - optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset; - } - RequestSpecification requestSpecification = given(); - if (apiToken != null) { - requestSpecification = given() - .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); - } - return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam); - } - - static Response uploadFileDirect(String url, InputStream inputStream) { - return given() - .header("x-amz-tagging", "dv-state=temp") - .body(inputStream) - .put(url); - } - - static Response downloadFileNoRedirect(Integer fileId, String apiToken) { - return given().when().redirects().follow(false) - .get("/api/access/datafile/" + fileId + "?key=" + apiToken); - } - - static Response downloadFromUrl(String url) { - return given().get(url); - } - } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 9b264086c27..12bb069424f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2361,6 +2361,56 @@ static Response deleteStorageSite(long storageSiteId) { .delete("/api/admin/storageSites/" + storageSiteId); } + static Response listStorageDrivers(String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/admin/dataverse/storageDrivers"); + } + + static Response getStorageDriver(String dvAlias, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/admin/dataverse/" + dvAlias + "/storageDriver"); + } + + static Response setStorageDriver(String dvAlias, String label, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(label) + .put("/api/admin/dataverse/" + dvAlias + "/storageDriver"); + } + + static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) { + String idInPath = idOrPersistentIdOfDataset; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) { + idInPath = ":persistentId"; + optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset; + } + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam); + } + + static Response uploadFileDirect(String url, InputStream inputStream) { + return given() + .header("x-amz-tagging", "dv-state=temp") + .body(inputStream) + .put(url); + } + + static Response downloadFileNoRedirect(Integer fileId, String apiToken) { + return given().when().redirects().follow(false) + .get("/api/access/datafile/" + fileId + "?key=" + apiToken); + } + + static Response downloadFromUrl(String url) { + return given().get(url); + } + static Response metricsDataversesToMonth(String yyyymm, String queryParams) { String optionalYyyyMm = ""; if (yyyymm != null) { From 7349ed9f754e05ff7b16a24ea8f3c24c060ed593 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 10:43:38 -0500 Subject: [PATCH 296/414] get logo, picking 48px size for datafile thumbs FWIW: QDR generates a 400px version here and then uses styling to fit the page. Not sure what the motivation for that was without digging. --- .../iq/dataverse/dataset/DatasetUtil.java | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 096f1f87acc..ccf861ebdc8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -411,6 +411,69 @@ public static InputStream getThumbnailAsInputStream(Dataset dataset, int size) { return nonDefaultDatasetThumbnail; } } + + public static InputStream getLogoAsInputStream(Dataset dataset) { + if (dataset == null) { + return null; + } + StorageIO dataAccess = null; + + try { + dataAccess = DataAccess.getStorageIO(dataset); + } catch (IOException ioex) { + logger.warning("getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier() + + " (" + ioex.getMessage() + ")"); + } + + InputStream in = null; + try { + if (dataAccess == null) { + logger.warning( + "getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier()); + } else { + in = dataAccess.getAuxFileAsInputStream(datasetLogoFilenameFinal); + } + } catch (IOException ex) { + logger.fine( + "Dataset-level thumbnail file does not exist, or failed to open; will try to find an image file that can be used as the thumbnail."); + } + + if (in == null) { + DataFile thumbnailFile = dataset.getThumbnailFile(); + + if (thumbnailFile == null) { + if (dataset.isUseGenericThumbnail()) { + logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo and is 'Use Generic'."); + return null; + } else { + thumbnailFile = attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null); + if (thumbnailFile == null) { + logger.fine("Dataset (id :" + dataset.getId() + + ") does not have a logo available that could be selected automatically."); + return null; + } else { + + } + } + } + if (thumbnailFile.isRestricted()) { + logger.fine("Dataset (id :" + dataset.getId() + + ") has a logo the user selected but the file must have later been restricted. Returning null."); + return null; + } + + try { + in = ImageThumbConverter.getImageThumbnailAsInputStream(thumbnailFile.getStorageIO(), + ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE).getInputStream(); + } catch (IOException ioex) { + logger.warning("getLogo(): Failed to get logo from DataFile for " + dataset.getStorageIdentifier() + + " (" + ioex.getMessage() + ")"); + ioex.printStackTrace(); + } + + } + return in; + } /** * The dataset logo is the file that a user uploads which is *not* one of From 6f1cd087624fea70a1c37425aacaf05c9d7ba0bf Mon Sep 17 00:00:00 2001 From: GPortas Date: Tue, 5 Dec 2023 15:53:21 +0000 Subject: [PATCH 297/414] Added: checks before calling getFileMetadatas on canDownloadAtLeastOneFile method in PermissionServiceBean --- .../iq/dataverse/PermissionServiceBean.java | 51 ++++++++++++++++++- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 9e6628617ce..2e4627576c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -41,6 +41,9 @@ import java.util.stream.Collectors; import static java.util.stream.Collectors.toList; import jakarta.persistence.Query; +import jakarta.persistence.criteria.CriteriaBuilder; +import jakarta.persistence.criteria.CriteriaQuery; +import jakarta.persistence.criteria.Root; /** * Your one-stop-shop for deciding which user can do what action on which @@ -837,12 +840,56 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio return false; } - public boolean canDownloadAtLeastOneFile(User requestUser, DatasetVersion datasetVersion) { + /** + * Checks if a User can download at least one file of the target DatasetVersion. + * + * @param user User to check + * @param datasetVersion DatasetVersion to check + * @return boolean indicating whether the user can download at least one file or not + */ + public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersion) { + if (user.isSuperuser()) { + return true; + } + if (hasReleasedFiles(datasetVersion)) { + return true; + } for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { - if (userOn(requestUser, fileMetadata.getDataFile()).has(Permission.DownloadFile)) { + if (userOn(user, fileMetadata.getDataFile()).has(Permission.DownloadFile)) { return true; } } return false; } + + /** + * Checks if a DatasetVersion has released files. + * + * This method is mostly based on {@link #isPublicallyDownloadable(DvObject)} although in this case, instead of basing + * the search on a particular file, it searches for the total number of files in the target version that are present + * in the released version. + * + * @param targetDatasetVersion DatasetVersion to check + * @return boolean indicating whether the dataset version has released files or not + */ + private boolean hasReleasedFiles(DatasetVersion targetDatasetVersion) { + Dataset targetDataset = targetDatasetVersion.getDataset(); + if (!targetDataset.isReleased()) { + return false; + } + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(Long.class); + Root datasetVersionRoot = criteriaQuery.from(DatasetVersion.class); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + criteriaQuery + .select(criteriaBuilder.count(fileMetadataRoot)) + .where(criteriaBuilder.and( + criteriaBuilder.equal(fileMetadataRoot.get("dataFile").get("restricted"), false), + criteriaBuilder.equal(datasetVersionRoot.get("dataset"), targetDataset), + criteriaBuilder.equal(datasetVersionRoot.get("versionState"), DatasetVersion.VersionState.RELEASED), + fileMetadataRoot.in(targetDatasetVersion.getFileMetadatas()), + fileMetadataRoot.in(datasetVersionRoot.get("fileMetadatas")))); + Long result = em.createQuery(criteriaQuery).getSingleResult(); + return result > 0; + } } From c194d74b2029917de050fe5d40b237b23bddf3ab Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 5 Dec 2023 10:59:46 -0500 Subject: [PATCH 298/414] Clarified the sentence about the initial deployment in the release note. #8549 --- doc/release-notes/8549-collection-quotas.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/8549-collection-quotas.md b/doc/release-notes/8549-collection-quotas.md index 29b84213cfb..b3635d0c5a1 100644 --- a/doc/release-notes/8549-collection-quotas.md +++ b/doc/release-notes/8549-collection-quotas.md @@ -1,3 +1,3 @@ This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. -Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the deployment. +Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 From cf7e664e626994419ca3a1c80785290da7efe683 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 5 Dec 2023 12:02:41 -0500 Subject: [PATCH 299/414] moved the entitymanager calls from a command to the service #8549 --- .../edu/harvard/iq/dataverse/DataverseServiceBean.java | 8 ++++++++ .../engine/command/impl/DeleteCollectionQuotaCommand.java | 4 +--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 487215c7a65..b6e666e8058 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -935,4 +935,12 @@ public void saveStorageQuota(Dataverse target, Long allocation) { } em.flush(); } + + public void disableStorageQuota(StorageQuota storageQuota) { + if (storageQuota != null && storageQuota.getAllocation() != null) { + storageQuota.setAllocation(null); + em.merge(storageQuota); + em.flush(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java index 4015228366b..c0f863686da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java @@ -46,9 +46,7 @@ public void executeImpl(CommandContext ctxt) throws CommandException { StorageQuota storageQuota = targetDataverse.getStorageQuota(); if (storageQuota != null && storageQuota.getAllocation() != null) { - storageQuota.setAllocation(null); - ctxt.em().merge(storageQuota); - ctxt.em().flush(); + ctxt.dataverses().disableStorageQuota(storageQuota); } // ... and if no quota was enabled on the collection - nothing to do = success } From dfa49c3720f866f36df0b6cd712f1c5144dfee44 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 14:31:32 -0500 Subject: [PATCH 300/414] rename flyway script --- ...thumb-failures.sql => V6.0.0.6__9506-track-thumb-failures.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V6.0.0.5__9506-track-thumb-failures.sql => V6.0.0.6__9506-track-thumb-failures.sql} (100%) diff --git a/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql similarity index 100% rename from src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql rename to src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql From 70a3442cc9a6c672ef8a553be8b279b3b8ea1b52 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 5 Dec 2023 14:36:21 -0500 Subject: [PATCH 301/414] updated aux. file service bean #8549 --- .../dataverse/AuxiliaryFileServiceBean.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 8c96f98ce39..363622ba3bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -2,6 +2,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -46,6 +47,8 @@ public class AuxiliaryFileServiceBean implements java.io.Serializable { @EJB private SystemConfig systemConfig; + @EJB + StorageUseServiceBean storageUseService; public AuxiliaryFile find(Object pk) { return em.find(AuxiliaryFile.class, pk); @@ -126,6 +129,13 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile } dataFile.getAuxiliaryFiles().add(auxFile); } + // We've just added this file to storage; increment the StorageUse + // record if needed. + if (auxFile.getFileSize() != null + && auxFile.getFileSize() > 0 + && dataFile.getOwner() != null ) { + storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), auxFile.getFileSize()); + } } catch (IOException ioex) { logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage()); throw new InternalServerErrorException(); @@ -181,6 +191,7 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form if (af == null) { throw new FileNotFoundException(); } + Long auxFileSize = af.getFileSize(); em.remove(af); StorageIO storageIO; storageIO = dataFile.getStorageIO(); @@ -188,6 +199,14 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form if (storageIO.isAuxObjectCached(auxExtension)) { storageIO.deleteAuxObject(auxExtension); } + // We've just deleted this file from storage; update the StorageUse + // record if needed. + if (auxFileSize != null + && auxFileSize > 0 + && dataFile.getOwner() != null) { + storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), (0L - auxFileSize)); + } + } public List findAuxiliaryFiles(DataFile dataFile) { From c54a85fca9377b74efc0e74e8a70a6de2f6fccc4 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 5 Dec 2023 14:52:23 -0500 Subject: [PATCH 302/414] #9464 add caveats to release note. --- doc/release-notes/9464-json-validation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md index 4b08f2ca9dd..f104263ba35 100644 --- a/doc/release-notes/9464-json-validation.md +++ b/doc/release-notes/9464-json-validation.md @@ -1,3 +1,3 @@ -Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. (Issue #9464 and #9465) +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html From 2379828c2737260901b23020a436f5cab6cc962a Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 5 Dec 2023 15:05:12 -0500 Subject: [PATCH 303/414] Update native-api.rst --- doc/sphinx-guides/source/api/native-api.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 2d37c3b07ae..29aa7c880ac 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -510,7 +510,9 @@ The fully expanded example above (without environment variables) looks like this Retrieve a Dataset JSON Schema for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset: +Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset. This +first version of the schema only includes required elements and fields. In the future we plan to improve the schema by adding controlled +vocabulary and more robust dataset field format testing: .. code-block:: bash @@ -535,7 +537,8 @@ While it is recommended to download a copy of the JSON Schema from the collectio Validate Dataset JSON File for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Validates a dataset JSON file customized for a given collection prior to creating the dataset: +Validates a dataset JSON file customized for a given collection prior to creating the dataset. The validation only tests for json formatting +and the presence of required elements: .. code-block:: bash From dd2d9726e3125975493fa6dbf70578d76fa5f07c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 16:47:04 -0500 Subject: [PATCH 304/414] globus store options --- .../source/installation/config.rst | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 7b32da8f6c3..e0e4d4cd89e 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -499,14 +499,14 @@ Logging & Slow Performance .. _file-storage: -File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores ------------------------------------------------------------------------------------------------------ +File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores and/or Globus Stores +-------------------------------------------------------------------------------------------------------------------------- By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara6/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. -A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. +A Dataverse installation can alternately store files in a Swift or S3-compatible object store, or on a Globus endpoint, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. -A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web-accessible trusted remote store. +A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web or Globus accessible trusted remote store. A Dataverse installation can be configured to allow out of band upload by setting the ``dataverse.files.\.upload-out-of-band`` JVM option to ``true``. By default, Dataverse supports uploading files via the :ref:`add-file-api`. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). @@ -958,7 +958,7 @@ Once you have configured a trusted remote store, you can point your users to the dataverse.files..type ``remote`` **Required** to mark this storage as remote. (none) dataverse.files..label **Required** label to be shown in the UI for this storage. (none) dataverse.files..base-url **Required** All files must have URLs of the form /* . (none) - dataverse.files..base-store **Optional** The id of a base store (of type file, s3, or swift). (the default store) + dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (the default store) dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` dataverse.files..secret-key A key used to sign download requests sent to the remote store. Optional. (none) dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 @@ -967,6 +967,46 @@ Once you have configured a trusted remote store, you can point your users to the =========================================== ================== ========================================================================== =================== +.. _globus-storage: + +Globus Storage +++++++++++++++ + +Globus stores allow Dataverse to manage files stored in Globus endpoints or to reference files in remote Globus endpoints, with users leveraging Globus to transfer files to/from Dataverse (rather than using HTTP/HTTPS). +See :doc:`/developers/big-data-support` for additional information on how to use a globus store. Consult the `Globus documentation `_ for information about using Globus and configuring Globus endpoints. + +In addition to having the type "globus" and requiring a label, Globus Stores share many options with Trusted Remote Stores and options to specify and access a Globus endpoint(s). As with Remote Stores, Globus Stores also use a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). +These and other available options are described in the table below. + +There are two types of Globus stores +- managed - where Dataverse manages the Globus endpoint, deciding where transferred files are stored and managing access control for users transferring files to/from Dataverse +- remote - where Dataverse references files that remain on trusted remote Globus endpoints + +For managed stores, there are two variants, connecting to standard/file-based Globus endpoints and to endpoints using an underlying S3 store via the Globus S3 Connector. +With the former, Dataverse has no direct access to the file contents and functionality related to ingest, fixity hash validation, etc. are not available. With the latter, Dataverse can access files internally via S3 and the functionality supported is similar to that when using S3 direct upload. + +Once you have configured a globus store, it is recommended that you install the `dataverse-globus app `_ to allow transfers in/out of Dataverse to be initated via the Dataverse user interface. Alternately, you can point your users to the :doc:`/developers/globus-api` for information about API support. + +.. table:: + :align: left + + ======================================================= ================== ========================================================================== =================== + JVM Option Value Description Default value + ======================================================= ================== ========================================================================== =================== + dataverse.files..type ``globus`` **Required** to mark this storage as globus enabled. (none) + dataverse.files..label **Required** label to be shown in the UI for this storage. (none) + dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (the default store) + dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional. (none) + dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) + dataverse.files..managed ``true``/``false`` Whether dataverse manages an associated Globus endpoint ``false`` + dataverse.files..transfer-endpoint-with-basepath The *managed* Globus endpoint id and associated base path for file storage (none) + dataverse.files..globus-token A Globus token (base64 endcoded : + for a managed store) - using a microprofile alias is recommended (none) + dataverse.files..reference-endpoints-with-basepaths A comma separated list of *remote* trusted Globus endpoint id/s (none) + dataverse.files..files-not-accessible-by-dataverse ``true``/``false`` Should be true for S3 Connector-based *managed* stores ``false`` + + ======================================================= ================== ========================================================================== =================== + .. _temporary-file-storage: Temporary Upload File Storage From 4d7818a7be615033bd00261a6a0951c703c0ad3b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 16:59:04 -0500 Subject: [PATCH 305/414] merge miss --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 13ec049fa0a..8afc365417e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -87,7 +87,7 @@ /* Amazon AWS S3 driver */ -public class S3AccessIO extends StorageIO implements GlobusAccessibleStore { +public class S3AccessIO extends StorageIO { private static final Config config = ConfigProvider.getConfig(); private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO"); @@ -1194,7 +1194,6 @@ private static AmazonS3 getClient(String driverId) { * * if a profile and static credentials are both explicitly set, the profile will be used preferentially, and * * if no store-specific credentials are set, the global credentials will be preferred over using any "default" profile credentials that are found. */ - String s3profile = getConfigParamForDriver(driverId, PROFILE,"default"); ArrayList providers = new ArrayList<>(); From ceacf7e92c045a61b96205536f442dc48142cb2a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 16:59:40 -0500 Subject: [PATCH 306/414] add a stub globus api page since it is referenced in the config doc --- .../source/developers/globus-api.rst | 282 ++++++++++++++++++ doc/sphinx-guides/source/developers/index.rst | 1 + 2 files changed, 283 insertions(+) create mode 100644 doc/sphinx-guides/source/developers/globus-api.rst diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst new file mode 100644 index 00000000000..2775ffd2142 --- /dev/null +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -0,0 +1,282 @@ +Globus Transfer API +=================== + +The Globus API addresses three use cases: +* Transfer to a Dataverse-managed Globus endpoint (File-based or using the Globus S3 Connector) +* Reference of files that will remain in a remote Globus endpoint +* Transfer from a Dataverse-managed Globus endpoint + +The ability for Dataverse to interact with Globus endpoints is configured via +Direct upload involves a series of three activities, each involving interacting with the server for a Dataverse installation: + +* Requesting initiation of a transfer from the server +* Use of the pre-signed URL(s) returned in that call to perform an upload/multipart-upload of the file to S3 +* A call to the server to register the file/files as part of the dataset/replace a file in the dataset or to cancel the transfer + +This API is only enabled when a Dataset is configured with a data store supporting direct S3 upload. +Administrators should be aware that partial transfers, where a client starts uploading the file/parts of the file and does not contact the server to complete/cancel the transfer, will result in data stored in S3 that is not referenced in the Dataverse installation (e.g. should be considered temporary and deleted.) + + +Requesting Direct Upload of a DataFile +-------------------------------------- +To initiate a transfer of a file to S3, make a call to the Dataverse installation indicating the size of the file to upload. The response will include a pre-signed URL(s) that allow the client to transfer the file. Pre-signed URLs include a short-lived token authorizing the action represented by the URL. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export SIZE=1000000000 + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/uploadurls?persistentId=$PERSISTENT_IDENTIFIER&size=$SIZE" + +The response to this call, assuming direct uploads are enabled, will be one of two forms: + +Single URL: when the file is smaller than the size at which uploads must be broken into multiple parts + +.. code-block:: bash + + { + "status":"OK", + "data":{ + "url":"...", + "partSize":1073741824, + "storageIdentifier":"s3://demo-dataverse-bucket:177883619b8-892ca9f7112e" + } + } + +Multiple URLs: when the file must be uploaded in multiple parts. The part size is set by the Dataverse installation and, for AWS-based storage, range from 5 MB to 5 GB + +.. code-block:: bash + + { + "status":"OK", + "data":{ + "urls":{ + "1":"...", + "2":"...", + "3":"...", + "4":"...", + "5":"..." + } + "abort":"/api/datasets/mpupload?...", + "complete":"/api/datasets/mpupload?..." + "partSize":1073741824, + "storageIdentifier":"s3://demo-dataverse-bucket:177883b000e-49cedef268ac" + } + +In the example responses above, the URLs, which are very long, have been omitted. These URLs reference the S3 server and the specific object identifier that will be used, starting with, for example, https://demo-dataverse-bucket.s3.amazonaws.com/10.5072/FK2FOQPJS/177883b000e-49cedef268ac?... + +The client must then use the URL(s) to PUT the file, or if the file is larger than the specified partSize, parts of the file. + +In the single part case, only one call to the supplied URL is required: + +.. code-block:: bash + + curl -H 'x-amz-tagging:dv-state=temp' -X PUT -T "" + + +In the multipart case, the client must send each part and collect the 'eTag' responses from the server. The calls for this are the same as the one for the single part case except that each call should send a slice of the total file, with the last part containing the remaining bytes. +The responses from the S3 server for these calls will include the 'eTag' for the uploaded part. + +To successfully conclude the multipart upload, the client must call the 'complete' URI, sending a json object including the part eTags: + +.. code-block:: bash + + curl -X PUT "$SERVER_URL/api/datasets/mpload?..." -d '{"1":"","2":"","3":"","4":"","5":""}' + +If the client is unable to complete the multipart upload, it should call the abort URL: + +.. code-block:: bash + + curl -X DELETE "$SERVER_URL/api/datasets/mpload?..." + + +.. _direct-add-to-dataset-api: + +Adding the Uploaded file to the Dataset +--------------------------------------- + +Once the file exists in the s3 bucket, a final API call is needed to add it to the Dataset. This call is the same call used to upload a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: + +* "storageIdentifier" - String, as specified in prior calls +* "fileName" - String +* "mimeType" - String +* fixity/checksum: either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + +To add multiple Uploaded Files to the Dataset +--------------------------------------------- + +Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: + +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ + {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + + +Replacing an existing file in the Dataset +----------------------------------------- + +Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for: + +* "storageIdentifier" - String, as specified in prior calls +* "fileName" - String +* "mimeType" - String +* fixity/checksum: either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512. +Note that the API call does not validate that the file matches the hash value supplied. If a Dataverse instance is configured to validate file fixity hashes at publication time, a mismatch would be caught at that time and cause publication to fail. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export FILE_IDENTIFIER=5072 + export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}' + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA" + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + +Replacing multiple existing files in the Dataset +------------------------------------------------ + +Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values: + +* "fileToReplaceId" - the id of the file being replaced +* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false) +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g. + +.. code-block:: + + { + "status": "OK", + "data": { + "Files": [ + { + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", + "errorMessage": "Bad Request:The file to replace does not belong to this dataset.", + "fileDetails": { + "fileToReplaceId": 10, + "description": "My description.", + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "restrict": "false", + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", + "fileName": "file1.Bin", + "mimeType": "application/octet-stream", + "checksum": { + "@type": "SHA-1", + "@value": "123456" + } + } + }, + { + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", + "successMessage": "Replaced successfully in the dataset", + "fileDetails": { + "description": "My description.", + "label": "file2.txt", + "restricted": false, + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "dataFile": { + "persistentId": "", + "pidURL": "", + "filename": "file2.txt", + "contentType": "text/plain", + "filesize": 2407, + "description": "My description.", + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", + "rootDataFileId": 11, + "previousDataFileId": 11, + "checksum": { + "type": "SHA-1", + "value": "123789" + } + } + } + } + ], + "Result": { + "Total number of files": 2, + "Number of files successfully replaced": 1 + } + } + } + + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index 60d97feeef9..458a78a6c95 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -39,6 +39,7 @@ Developer Guide big-data-support aux-file-support s3-direct-upload-api + globus-api dataset-semantic-metadata-api dataset-migration-api workflows From 03a4c77155934060c33c33ed27ea2f7628301e91 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 10:58:33 +0000 Subject: [PATCH 307/414] Refactor: shortcut on datafile permission check --- .../harvard/iq/dataverse/PermissionServiceBean.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 2e4627576c6..107024bcfb9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -851,11 +851,13 @@ public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersio if (user.isSuperuser()) { return true; } - if (hasReleasedFiles(datasetVersion)) { + if (hasUnrestrictedReleasedFiles(datasetVersion)) { return true; } for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { - if (userOn(user, fileMetadata.getDataFile()).has(Permission.DownloadFile)) { + DataFile dataFile = fileMetadata.getDataFile(); + Set ras = new HashSet<>(groupService.groupsFor(user, dataFile)); + if (hasGroupPermissionsFor(ras, dataFile, EnumSet.of(Permission.DownloadFile))) { return true; } } @@ -863,7 +865,7 @@ public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersio } /** - * Checks if a DatasetVersion has released files. + * Checks if a DatasetVersion has unrestricted released files. * * This method is mostly based on {@link #isPublicallyDownloadable(DvObject)} although in this case, instead of basing * the search on a particular file, it searches for the total number of files in the target version that are present @@ -872,7 +874,7 @@ public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersio * @param targetDatasetVersion DatasetVersion to check * @return boolean indicating whether the dataset version has released files or not */ - private boolean hasReleasedFiles(DatasetVersion targetDatasetVersion) { + private boolean hasUnrestrictedReleasedFiles(DatasetVersion targetDatasetVersion) { Dataset targetDataset = targetDatasetVersion.getDataset(); if (!targetDataset.isReleased()) { return false; From 326b784da752091bf4c7b3bf4112ebfc327acb69 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 10:59:08 +0000 Subject: [PATCH 308/414] Refactor: variable extracted in isPublicallyDownloadable --- .../java/edu/harvard/iq/dataverse/PermissionServiceBean.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 107024bcfb9..1c568e83143 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -451,8 +451,9 @@ private boolean isPublicallyDownloadable(DvObject dvo) { if (!df.isRestricted()) { if (df.getOwner().getReleasedVersion() != null) { - if (df.getOwner().getReleasedVersion().getFileMetadatas() != null) { - for (FileMetadata fm : df.getOwner().getReleasedVersion().getFileMetadatas()) { + List fileMetadatas = df.getOwner().getReleasedVersion().getFileMetadatas(); + if (fileMetadatas != null) { + for (FileMetadata fm : fileMetadatas) { if (df.equals(fm.getDataFile())) { return true; } From 16c685dc30601d8a8b0140cec4b8621e1fe33a99 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 11:22:06 +0000 Subject: [PATCH 309/414] Changed: passing DataverseRequest instead of User to canDownloadAtLeastOneFile --- .../harvard/iq/dataverse/PermissionServiceBean.java | 11 ++++++----- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 1c568e83143..e87809ada56 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -844,20 +844,21 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio /** * Checks if a User can download at least one file of the target DatasetVersion. * - * @param user User to check + * @param dataverseRequest DataverseRequest to check * @param datasetVersion DatasetVersion to check * @return boolean indicating whether the user can download at least one file or not */ - public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersion) { - if (user.isSuperuser()) { + public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, DatasetVersion datasetVersion) { + if (dataverseRequest.getUser().isSuperuser()) { return true; } if (hasUnrestrictedReleasedFiles(datasetVersion)) { return true; } - for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { + List fileMetadatas = datasetVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : fileMetadatas) { DataFile dataFile = fileMetadata.getDataFile(); - Set ras = new HashSet<>(groupService.groupsFor(user, dataFile)); + Set ras = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile)); if (hasGroupPermissionsFor(ras, dataFile, EnumSet.of(Permission.DownloadFile))) { return true; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index a9cfefc33d8..6a1e11e690b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4145,7 +4145,7 @@ public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext cr @Context HttpHeaders headers) { return response(req -> { DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false); - return ok(permissionService.canDownloadAtLeastOneFile(getRequestUser(crc), datasetVersion)); + return ok(permissionService.canDownloadAtLeastOneFile(req, datasetVersion)); }, getRequestUser(crc)); } } From 8ca2338723a0ec1a57a9affc923fe65229009909 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 11:22:51 +0000 Subject: [PATCH 310/414] Fixed: method doc --- .../java/edu/harvard/iq/dataverse/PermissionServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index e87809ada56..359e8823fce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -842,7 +842,7 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio } /** - * Checks if a User can download at least one file of the target DatasetVersion. + * Checks if a DataverseRequest can download at least one file of the target DatasetVersion. * * @param dataverseRequest DataverseRequest to check * @param datasetVersion DatasetVersion to check From 96cd5c9d55437180cfa256df38b0d5990c97ec6c Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 11:24:49 +0000 Subject: [PATCH 311/414] Added: explanatory comment --- .../java/edu/harvard/iq/dataverse/PermissionServiceBean.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 359e8823fce..6dc943f1ca8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -852,6 +852,7 @@ public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, Data if (dataverseRequest.getUser().isSuperuser()) { return true; } + // This is a shortcut to avoid having to check version files if the condition is met if (hasUnrestrictedReleasedFiles(datasetVersion)) { return true; } From 3c1820b060b303da2bfa97132667ceccb5d5e977 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 11:48:09 +0000 Subject: [PATCH 312/414] Added: includeDeaccessioned query param to getCanDownloadAtLeastOneFile API endpoint --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 6a1e11e690b..579f4f78fe1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4141,10 +4141,11 @@ public Response getUserPermissionsOnDataset(@Context ContainerRequestContext crc public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, @Context UriInfo uriInfo, @Context HttpHeaders headers) { return response(req -> { - DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false); + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned); return ok(permissionService.canDownloadAtLeastOneFile(req, datasetVersion)); }, getRequestUser(crc)); } From 811d79a7f8d017745fcfd782b233ec583d3669e2 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 6 Dec 2023 08:33:38 -0500 Subject: [PATCH 313/414] change minio access key, more l33t #6783 --- docker-compose-dev.yml | 2 +- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 98376e255dd..e68215d53d2 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -212,7 +212,7 @@ services: - minio_storage:/data environment: MINIO_ROOT_USER: 4cc355_k3y - MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k35 + MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y command: server /data networks: diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 1306c30d9c1..41446349093 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -55,7 +55,7 @@ public static void setUp() { .withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build(); String accessKeyMinio = "4cc355_k3y"; - String secretKeyMinio = "s3cr3t_4cc355_k35"; + String secretKeyMinio = "s3cr3t_4cc355_k3y"; s3minio = AmazonS3ClientBuilder.standard() // https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local .withPathStyleAccessEnabled(Boolean.TRUE) From a81ad72a0896073e043ee57848e571d7a3754a8a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 6 Dec 2023 10:50:46 -0500 Subject: [PATCH 314/414] comment out optional listing of buckets #6783 --- .../harvard/iq/dataverse/api/S3AccessIT.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 41446349093..74150ca120a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -62,16 +62,15 @@ public static void setUp() { .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyMinio, secretKeyMinio))) .withEndpointConfiguration(new EndpointConfiguration("http://localhost:9000", Regions.US_EAST_1.getName())).build(); - System.out.println("buckets on LocalStack before attempting to create " + BUCKET_NAME); - for (Bucket bucket : s3localstack.listBuckets()) { - System.out.println("bucket: " + bucket); - } - - System.out.println("buckets on MinIO before attempting to create " + BUCKET_NAME); - for (Bucket bucket : s3minio.listBuckets()) { - System.out.println("bucket: " + bucket); - } - +// System.out.println("buckets on LocalStack before attempting to create " + BUCKET_NAME); +// for (Bucket bucket : s3localstack.listBuckets()) { +// System.out.println("bucket: " + bucket); +// } +// +// System.out.println("buckets on MinIO before attempting to create " + BUCKET_NAME); +// for (Bucket bucket : s3minio.listBuckets()) { +// System.out.println("bucket: " + bucket); +// } // create bucket if it doesn't exist // Note that we create the localstack bucket with conf/localstack/buckets.sh // because we haven't figured out how to create it properly in Java. From 0bd9f139e5dca2851ca88ed12c5e31af9c5bbfe9 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Wed, 6 Dec 2023 11:01:04 -0500 Subject: [PATCH 315/414] Update doc/release-notes/6.1-release-notes.md Co-authored-by: Philip Durbin --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index c2b52ab34b8..06a3e01f7af 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -1,6 +1,6 @@ # Dataverse 6.1 -(If this note appears truncated on the GitHub Releases page, you can view it in full in the source tree: https://github.com/IQSS/dataverse/blob/master/doc/release-notes/6.1-release-notes.md) +Please note: To read these instructions in full, please go to https://github.com/IQSS/dataverse/releases/tag/v6.1 rather than the list of releases, which will cut them off. This release brings new features, enhancements, and bug fixes to the Dataverse software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. From c97d7b55e2932dacaa19e4e3ac403c88a25bd2ee Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 11:01:56 -0500 Subject: [PATCH 316/414] globus api doc --- .../source/developers/globus-api.rst | 348 ++++++++---------- 1 file changed, 149 insertions(+), 199 deletions(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 2775ffd2142..6a94f220dc2 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -6,277 +6,227 @@ The Globus API addresses three use cases: * Reference of files that will remain in a remote Globus endpoint * Transfer from a Dataverse-managed Globus endpoint -The ability for Dataverse to interact with Globus endpoints is configured via -Direct upload involves a series of three activities, each involving interacting with the server for a Dataverse installation: +The ability for Dataverse to interact with Globus endpoints is configured via a Globus store - see :ref:`globus-storage`. -* Requesting initiation of a transfer from the server -* Use of the pre-signed URL(s) returned in that call to perform an upload/multipart-upload of the file to S3 -* A call to the server to register the file/files as part of the dataset/replace a file in the dataset or to cancel the transfer +Globus transfers (or referencing a remote endpoint) for upload and download transfers involve a series of steps. These can be accomplished using the Dataverse and Globus APIs. (These are used internally by the `dataverse-globus app `_ when transfers are done via the Dataverse UI.) -This API is only enabled when a Dataset is configured with a data store supporting direct S3 upload. -Administrators should be aware that partial transfers, where a client starts uploading the file/parts of the file and does not contact the server to complete/cancel the transfer, will result in data stored in S3 that is not referenced in the Dataverse installation (e.g. should be considered temporary and deleted.) +Requesting Upload or Download Parameters +---------------------------------------- - -Requesting Direct Upload of a DataFile --------------------------------------- -To initiate a transfer of a file to S3, make a call to the Dataverse installation indicating the size of the file to upload. The response will include a pre-signed URL(s) that allow the client to transfer the file. Pre-signed URLs include a short-lived token authorizing the action represented by the URL. +The first step in preparing for a Globus transfer/reference operation is to request the parameters relevant for a given dataset: .. code-block:: bash - export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV - export SIZE=1000000000 - - curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/uploadurls?persistentId=$PERSISTENT_IDENTIFIER&size=$SIZE" - -The response to this call, assuming direct uploads are enabled, will be one of two forms: + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/globusUploadParameters?locale=$LOCALE" -Single URL: when the file is smaller than the size at which uploads must be broken into multiple parts +The response will be of the form: .. code-block:: bash { - "status":"OK", - "data":{ - "url":"...", - "partSize":1073741824, - "storageIdentifier":"s3://demo-dataverse-bucket:177883619b8-892ca9f7112e" + "status": "OK", + "data": { + "queryParameters": { + "datasetId": 29, + "siteUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com", + "datasetVersion": ":draft", + "dvLocale": "en", + "datasetPid": "doi:10.5072/FK2/ILLPXE", + "managed": "true", + "endpoint": "d8c42580-6528-4605-9ad8-116a61982644" + }, + "signedUrls": [ + { + "name": "requestGlobusTransferPaths", + "httpMethod": "POST", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/requestGlobusUploadPaths?until=2023-11-22T01:52:03.648&user=dataverseAdmin&method=POST&token=63ac4bb748d12078dded1074916508e19e6f6b61f64294d38e0b528010b07d48783cf2e975d7a1cb6d4a3c535f209b981c7c6858bc63afdfc0f8ecc8a139b44a", + "timeOut": 300 + }, + { + "name": "addGlobusFiles", + "httpMethod": "POST", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/addGlobusFiles?until=2023-11-22T01:52:03.648&user=dataverseAdmin&method=POST&token=2aaa03f6b9f851a72e112acf584ffc0758ed0cc8d749c5a6f8c20494bb7bc13197ab123e1933f3dde2711f13b347c05e6cec1809a8f0b5484982570198564025", + "timeOut": 300 + }, + { + "name": "getDatasetMetadata", + "httpMethod": "GET", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/versions/:draft?until=2023-11-22T01:52:03.649&user=dataverseAdmin&method=GET&token=1878d6a829cd5540e89c07bdaf647f1bea5314cc7a55433b0b506350dd330cad61ade3714a8ee199a7b464fb3b8cddaea0f32a89ac3bfc4a86cd2ea3004ecbb8", + "timeOut": 300 + }, + { + "name": "getFileListing", + "httpMethod": "GET", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/versions/:draft/files?until=2023-11-22T01:52:03.650&user=dataverseAdmin&method=GET&token=78e8ca8321624f42602af659227998374ef3788d0feb43d696a0e19086e0f2b3b66b96981903a1565e836416c504b6248cd3c6f7c2644566979bd16e23a99622", + "timeOut": 300 + } + ] + } } - } -Multiple URLs: when the file must be uploaded in multiple parts. The part size is set by the Dataverse installation and, for AWS-based storage, range from 5 MB to 5 GB +The response includes the id for the Globus endpoint to use along with several signed URLs. -.. code-block:: bash +The getDatasetMetadata and getFileListing URLs are just signed versions of the standard Dataset metadata and file listing API calls. The other two are Globus specific. - { - "status":"OK", - "data":{ - "urls":{ - "1":"...", - "2":"...", - "3":"...", - "4":"...", - "5":"..." - } - "abort":"/api/datasets/mpupload?...", - "complete":"/api/datasets/mpupload?..." - "partSize":1073741824, - "storageIdentifier":"s3://demo-dataverse-bucket:177883b000e-49cedef268ac" - } +If called for a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a +the "managed" parameter will be false, the "endpoint" parameter is replaced with a JSON array of "referenceEndpointsWithPaths" and the +requestGlobusTransferPaths and addGlobusFiles URLs are replaced with ones for requestGlobusReferencePaths and addFiles. All of these calls are +describe further below. + +The call to set up for a transfer out (download) is similar: -In the example responses above, the URLs, which are very long, have been omitted. These URLs reference the S3 server and the specific object identifier that will be used, starting with, for example, https://demo-dataverse-bucket.s3.amazonaws.com/10.5072/FK2FOQPJS/177883b000e-49cedef268ac?... +.. code-block:: bash -The client must then use the URL(s) to PUT the file, or if the file is larger than the specified partSize, parts of the file. + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/globusDownloadParameters?locale=$LOCALE" -In the single part case, only one call to the supplied URL is required: +Note that this API call supports an additional downloadId query parameter. This is only used when the globus-dataverse app is called from the Dataverse user interface. There is no need to use it when calling the API directly. -.. code-block:: bash +The returned response includes the same getDatasetMetadata and getFileListing URLs as in the upload case and includes "monitorGlobusDownload" and "requestGlobusDownload" URLs. The response will also indicate whether the store is "managed" and will provide the "endpoint" from which downloads can be made. - curl -H 'x-amz-tagging:dv-state=temp' -X PUT -T "" +Performing an Upload/Transfer In +-------------------------------- -In the multipart case, the client must send each part and collect the 'eTag' responses from the server. The calls for this are the same as the one for the single part case except that each call should send a slice of the total file, with the last part containing the remaining bytes. -The responses from the S3 server for these calls will include the 'eTag' for the uploaded part. +The information from the API call above can be used to provide a user with information about the dataset and to prepare to transfer or to reference files (based on the "managed" parameter). -To successfully conclude the multipart upload, the client must call the 'complete' URI, sending a json object including the part eTags: +Once the user identifies which files are to be added, the requestGlobusTransferPaths or requestGlobusReferencePaths URLs can be called. These both reference the same API call but must be used with different entries in the JSON body sent: .. code-block:: bash - curl -X PUT "$SERVER_URL/api/datasets/mpload?..." -d '{"1":"","2":"","3":"","4":"","5":""}' - -If the client is unable to complete the multipart upload, it should call the abort URL: + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export LOCALE=en-US + + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusUpload" -.. code-block:: bash - - curl -X DELETE "$SERVER_URL/api/datasets/mpload?..." - +Note that when using the dataverse-globus app or the return from the previous call, the URL for this call will be signed and no API_TOKEN is needed. -.. _direct-add-to-dataset-api: +In the managed case, the JSON body sent must include the id of the Globus user that will perform the transfer and the number of files that will be transferred: -Adding the Uploaded file to the Dataset ---------------------------------------- +.. code-block:: bash + { + "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", + "numberOfFiles":2 + } -Once the file exists in the s3 bucket, a final API call is needed to add it to the Dataset. This call is the same call used to upload a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: +In the remote reference case, the JSON body sent must include the Globus endpoint/paths that will be referenced: -* "storageIdentifier" - String, as specified in prior calls -* "fileName" - String -* "mimeType" - String -* fixity/checksum: either: +.. code-block:: bash + { + "referencedFiles":[ + "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt" + ] + } + +The response will include a JSON object. In the managed case, the map is from new assigned file storageidentifiers and specific paths on the managed Globus endpoint: +.. code-block:: bash - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + { + "status":"OK", + "data":{ + "globusm://18b49d3688c-62137dcb06e4":"/hdc1/10.5072/FK2/ILLPXE/18b49d3688c-62137dcb06e4", + "globusm://18b49d3688c-5c17d575e820":"/hdc1/10.5072/FK2/ILLPXE/18b49d3688c-5c17d575e820" + } + } -The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 +In the managed case, the specified Globus principal is granted write permission to the specified endpoint/path, +which will allow initiation of a transfer from the external endpoint to the managed endpoint using the Globus API. +The permission will be revoked if the transfer is not started and the next call to Dataverse to finish the transfer are not made within a short time (configurable, default of 5 minutes). + +In the remote/reference case, the map is from the initially supplied endpoint/paths to the new assigned file storageidentifiers: .. code-block:: bash - export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV - export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}" - - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" - -Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. - -To add multiple Uploaded Files to the Dataset ---------------------------------------------- + { + "status":"OK", + "data":{ + "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt":"globus://18bf8c933f4-ed2661e7d19b//d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt" + } + } -Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. -jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: -* "description" - A description of the file -* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset -* "storageIdentifier" - String -* "fileName" - String -* "mimeType" - String -* "fixity/checksum" either: - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings +Adding Files to the Dataset +--------------------------- -The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 +In the managed case, once a Globus transfer has been initiated a final API call is made to Dataverse to provide it with the task identifier of the transfer and information about the files being transferred: .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV - export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ - {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export JSON_DATA="{"taskIdentifier":"3f530302-6c48-11ee-8428-378be0d9c521", \ + "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \ + {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}" - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA"" -Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. +Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL. +With this information, Dataverse will begin to monitor the transfer and when it completes, will add all files for which the transfer succeeded. +As the transfer can take significant time and the API call is asynchronous, the only way to determine if the transfer succeeded via API is to use the standard calls to check the dataset lock state and contents. -Replacing an existing file in the Dataset ------------------------------------------ +Once the transfer completes, Dataverse will remove the write permission for the principal. -Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for: +Note that when using a managed endpoint that uses the Globus S3 Connector, the checksum should be correct as Dataverse can validate it. For file-based endpoints, the checksum should be included if available but Dataverse cannot verify it. -* "storageIdentifier" - String, as specified in prior calls -* "fileName" - String -* "mimeType" - String -* fixity/checksum: either: +In the remote/reference case, where there is no transfer to monitor, the standard /addFiles API call (see :ref:`direct-add-to-dataset-api`) is used instead. There are no changes for the Globus case. - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings +Downloading/Transfer Out Via Globus +----------------------------------- -The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512. -Note that the API call does not validate that the file matches the hash value supplied. If a Dataverse instance is configured to validate file fixity hashes at publication time, a mismatch would be caught at that time and cause publication to fail. +To begin downloading files, the requestGlobusDownload URL is used: .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org - export FILE_IDENTIFIER=5072 - export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}' - - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA" + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV -Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusDownload" -Replacing multiple existing files in the Dataset ------------------------------------------------- +The JSON body sent should include a list of file ids to download and, for a managed endpoint, the Globus principal that will make the transfer: -Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter. -jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values: +.. code-block:: bash + { + "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", + "fileIds":[60, 61] + } + +Note that this API call takes an optional downloadId parameter that is used with the dataverse-globus app. When downloadId is included, the list of fileIds is not needed. -* "fileToReplaceId" - the id of the file being replaced -* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false) -* "description" - A description of the file -* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset -* "storageIdentifier" - String -* "fileName" - String -* "mimeType" - String -* "fixity/checksum" either: +The response is a JSON object mapping the requested file Ids to Globus endpoint/paths. In the managed case, the principal will have been given read permissions for the specified paths: - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings +.. code-block:: bash + { + "status":"OK", + "data":{ + "60": "d8c42580-6528-4605-9ad8-116a61982644/hdc1/10.5072/FK2/ILLPXE/18bf3af9c78-92b8e168090e", + "61": "d8c42580-6528-4605-9ad8-116a61982644/hdc1/10.5072/FK2/ILLPXE/18bf3af9c78-c8d81569305c" + } + } -The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 +For the remote case, the use can perform the transfer without further contact with Dataverse. In the managed case, the user must initiate the transfer via the Globus API and then inform Dataverse. +Dataverse will then monitor the transfer and revoke the read permission when the transfer is complete. (Not making this last call could result in failure of the transfer.) .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV - export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' - - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" - -The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g. + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/monitorGlobusDownload" + +The JSON body sent just contains the task identifier for the transfer: -.. code-block:: +.. code-block:: bash { - "status": "OK", - "data": { - "Files": [ - { - "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", - "errorMessage": "Bad Request:The file to replace does not belong to this dataset.", - "fileDetails": { - "fileToReplaceId": 10, - "description": "My description.", - "directoryLabel": "data/subdir1", - "categories": [ - "Data" - ], - "restrict": "false", - "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", - "fileName": "file1.Bin", - "mimeType": "application/octet-stream", - "checksum": { - "@type": "SHA-1", - "@value": "123456" - } - } - }, - { - "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", - "successMessage": "Replaced successfully in the dataset", - "fileDetails": { - "description": "My description.", - "label": "file2.txt", - "restricted": false, - "directoryLabel": "data/subdir1", - "categories": [ - "Data" - ], - "dataFile": { - "persistentId": "", - "pidURL": "", - "filename": "file2.txt", - "contentType": "text/plain", - "filesize": 2407, - "description": "My description.", - "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", - "rootDataFileId": 11, - "previousDataFileId": 11, - "checksum": { - "type": "SHA-1", - "value": "123789" - } - } - } - } - ], - "Result": { - "Total number of files": 2, - "Number of files successfully replaced": 1 - } - } + "taskIdentifier":"b5fd01aa-8963-11ee-83ae-d5484943e99a" } + - -Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. From c7d73f64177745fa7892543407025f9130dcb83b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 11:25:22 -0500 Subject: [PATCH 317/414] default for globus-cache-maxage --- src/main/resources/META-INF/microprofile-config.properties | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 504b5e46735..ec8427795ee 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -18,6 +18,7 @@ dataverse.build= dataverse.files.directory=${STORAGE_DIR:/tmp/dataverse} dataverse.files.uploads=${STORAGE_DIR:${com.sun.aas.instanceRoot}}/uploads dataverse.files.docroot=${STORAGE_DIR:${com.sun.aas.instanceRoot}}/docroot +dataverse.files.globus-cache-maxage=5 # SEARCH INDEX dataverse.solr.host=localhost From 1fb7ddf6d89a1b36f9a059f016ac617aa6ec3758 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 11:27:42 -0500 Subject: [PATCH 318/414] fix spacing --- doc/sphinx-guides/source/developers/globus-api.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 6a94f220dc2..5b2b6982866 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -105,6 +105,7 @@ Note that when using the dataverse-globus app or the return from the previous ca In the managed case, the JSON body sent must include the id of the Globus user that will perform the transfer and the number of files that will be transferred: .. code-block:: bash + { "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", "numberOfFiles":2 @@ -113,6 +114,7 @@ In the managed case, the JSON body sent must include the id of the Globus user t In the remote reference case, the JSON body sent must include the Globus endpoint/paths that will be referenced: .. code-block:: bash + { "referencedFiles":[ "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt" @@ -120,6 +122,7 @@ In the remote reference case, the JSON body sent must include the Globus endpoin } The response will include a JSON object. In the managed case, the map is from new assigned file storageidentifiers and specific paths on the managed Globus endpoint: + .. code-block:: bash { @@ -161,7 +164,6 @@ In the managed case, once a Globus transfer has been initiated a final API call "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \ {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}" - curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA"" Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL. @@ -191,6 +193,7 @@ To begin downloading files, the requestGlobusDownload URL is used: The JSON body sent should include a list of file ids to download and, for a managed endpoint, the Globus principal that will make the transfer: .. code-block:: bash + { "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", "fileIds":[60, 61] From 6fee16dec8125390ea6aa7221a19fde0db2b9730 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 6 Dec 2023 11:52:24 -0500 Subject: [PATCH 319/414] #10151 incorporate json schema --- doc/release-notes/6.1-release-notes.md | 6 +++++- doc/release-notes/9464-json-validation.md | 3 --- 2 files changed, 5 insertions(+), 4 deletions(-) delete mode 100644 doc/release-notes/9464-json-validation.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 06a3e01f7af..990ba219cad 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -24,7 +24,7 @@ With the upload-out-of-band option enabled, it is also possible for file upload Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. -For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"] +For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] ### Improvements in the /versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions @@ -45,6 +45,8 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. +- getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection. +- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset json file is in proper format and contains the required elements and fields for a given dataverse collection. ### Extended the existing endpoints: - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. @@ -112,6 +114,8 @@ to generate updated versions. - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html See also #10060. +- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) + ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md deleted file mode 100644 index f104263ba35..00000000000 --- a/doc/release-notes/9464-json-validation.md +++ /dev/null @@ -1,3 +0,0 @@ -Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) - -For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html From 15e80aa4c847cb5ce8574fe600723c9cc81a5bc2 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 16:56:37 +0000 Subject: [PATCH 320/414] Fixed: roleAssignees setup in canDownloadAtLeastOneFile --- .../edu/harvard/iq/dataverse/PermissionServiceBean.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 6dc943f1ca8..471cac31e77 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -849,7 +849,8 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio * @return boolean indicating whether the user can download at least one file or not */ public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, DatasetVersion datasetVersion) { - if (dataverseRequest.getUser().isSuperuser()) { + User user = dataverseRequest.getUser(); + if (user.isSuperuser()) { return true; } // This is a shortcut to avoid having to check version files if the condition is met @@ -859,8 +860,9 @@ public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, Data List fileMetadatas = datasetVersion.getFileMetadatas(); for (FileMetadata fileMetadata : fileMetadatas) { DataFile dataFile = fileMetadata.getDataFile(); - Set ras = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile)); - if (hasGroupPermissionsFor(ras, dataFile, EnumSet.of(Permission.DownloadFile))) { + Set roleAssignees = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile)); + roleAssignees.add(user); + if (hasGroupPermissionsFor(roleAssignees, dataFile, EnumSet.of(Permission.DownloadFile))) { return true; } } From 4b71b36305fb6c18f7282530dc4491976a352936 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 17:02:07 +0000 Subject: [PATCH 321/414] Added: IT for getCanDownloadAtLeastOneFile endpoint --- .../harvard/iq/dataverse/api/DatasetsIT.java | 71 +++++++++++++++---- 1 file changed, 58 insertions(+), 13 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 945b741a94b..3510f2c06ef 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -80,7 +80,6 @@ import javax.xml.stream.XMLStreamReader; import static java.lang.Thread.sleep; -import static org.junit.jupiter.api.Assertions.assertEquals; import org.hamcrest.CoreMatchers; @@ -90,11 +89,7 @@ import static org.hamcrest.CoreMatchers.startsWith; import static org.hamcrest.CoreMatchers.nullValue; import static org.hamcrest.Matchers.contains; - -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.*; public class DatasetsIT { @@ -4123,10 +4118,10 @@ public void testGetUserPermissionsOnDataset() { } @Test - public void testGetCanDownloadAtLeastOneFile() { - Response createUser = UtilIT.createRandomUser(); - createUser.then().assertThat().statusCode(OK.getStatusCode()); - String apiToken = UtilIT.getApiTokenFromResponse(createUser); + public void testGetCanDownloadAtLeastOneFile() throws InterruptedException { + Response createUserResponse = UtilIT.createRandomUser(); + createUserResponse.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); @@ -4135,15 +4130,65 @@ public void testGetCanDownloadAtLeastOneFile() { Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + String datasetPersistentId = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); - // Call with valid dataset id - Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, apiToken); + // Upload file + String pathToTestFile = "src/test/resources/images/coffeeshop.png"; + Response uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), pathToTestFile, Json.createObjectBuilder().build(), apiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + + String fileId = JsonPath.from(uploadResponse.body().asString()).getString("data.files[0].dataFile.id"); + + // Publish dataset version + Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Make sure the dataset is published + Thread.sleep(3000); + + // Create a second user to call the getCanDownloadAtLeastOneFile method + Response createSecondUserResponse = UtilIT.createRandomUser(); + createSecondUserResponse.then().assertThat().statusCode(OK.getStatusCode()); + String secondUserApiToken = UtilIT.getApiTokenFromResponse(createSecondUserResponse); + String secondUserUsername = UtilIT.getUsernameFromResponse(createSecondUserResponse); + + // Call with a valid dataset id when a file is released + Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); boolean canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); assertTrue(canDownloadAtLeastOneFile); + // Restrict file + Response restrictFileResponse = UtilIT.restrictFile(fileId, true, apiToken); + restrictFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Publish dataset version + publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Make sure the dataset is published + Thread.sleep(3000); + + // Call with a valid dataset id when a file is restricted and the user does not have access + canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertFalse(canDownloadAtLeastOneFile); + + // Grant restricted file access to the user + Response grantFileAccessResponse = UtilIT.grantFileAccess(fileId, "@" + secondUserUsername, apiToken); + grantFileAccessResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Call with a valid dataset id when a file is restricted and the user has access + canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertTrue(canDownloadAtLeastOneFile); + // Call with invalid dataset id - Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, apiToken); + Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, secondUserApiToken); getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); } } From 6d2f87ca93c108a9b4ec4905372a2e1709b3f5cf Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 12:24:26 -0500 Subject: [PATCH 322/414] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 990ba219cad..4b5c20f3953 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -12,8 +12,8 @@ This release contains major upgrades to core components. Detailed upgrade instru ## Detailed Release Highlights, New Features and Use Case Scenarios ### Dataverse installation can be now be configured to allow out-of-band upload -- Installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. -By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). +In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. +By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. ### Alternative Title is made repeatable. @@ -23,7 +23,7 @@ With the upload-out-of-band option enabled, it is also possible for file upload Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` -Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. +Since Alternative Title is repeatable now, old json apis would not be compatible with a new version since value of alternative title has changed from simple string to an array. For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] ### Improvements in the /versions API @@ -70,7 +70,6 @@ This parameter applies a filter criteria to the operation and supports the follo - Can delete the dataset draft - getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. - ### DataFile API payload has been extended to include the following fields: - tabularData: Boolean field to know if the DataFile is of tabular type - fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) @@ -114,7 +113,7 @@ to generate updated versions. - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html See also #10060. -- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. @@ -125,12 +124,13 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools - - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. +- `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is +also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. +- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. ## OpenID Connect Authentication Provider Improvements @@ -175,6 +175,8 @@ As part of these testing improvements, the code coverage report file for unit te - dataverse.auth.oidc.subtitle - dataverse.auth.oidc.pkce.max-cache-size - dataverse.auth.oidc.pkce.max-cache-age +- dataverse.files.{driverId}.upload-out-of-band +- dataverse.files.guestbook-at-request ## Installation @@ -182,14 +184,17 @@ If this is a new installation, please follow our [Installation Guide](https://gu Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! -You are also very welcome to join the [Global Dataverse Community Consortium](https://dataversecommunity.global) (GDCC). +You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC). ## Upgrade Instructions - Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. +## Backward Incompatibilities +- Since Alternative Title is repeatable now, old json apis would not be compatible with a new version +- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, + ## Complete List of Changes For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. From 90ff56ca979cd71f1c467ff1cfa0dfeb8f619691 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:43:43 -0500 Subject: [PATCH 323/414] Update doc/release-notes/6.1-release-notes.md Co-authored-by: Philip Durbin --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 4b5c20f3953..e1a9214a982 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -122,7 +122,7 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development -- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. +- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. From 10e0e25fe10dda9f49b6126f591b9483adb2f765 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:44:49 -0500 Subject: [PATCH 324/414] Update doc/release-notes/6.1-release-notes.md Co-authored-by: Philip Durbin --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index e1a9214a982..427a07a4c2c 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -123,7 +123,7 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. -For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools +For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. From 3d55ed31de8fb9e45a2cedfecf07e22c82dae12a Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 12:47:53 -0500 Subject: [PATCH 325/414] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 427a07a4c2c..189f21f2322 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -19,21 +19,22 @@ With the upload-out-of-band option enabled, it is also possible for file upload ### Alternative Title is made repeatable. - One will need to update database with updated citation block. `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -- One will also need to update solr schema: - Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` - Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` +- One will also need to update Solr schema: + Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml` + Reload Solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` -Since Alternative Title is repeatable now, old json apis would not be compatible with a new version since value of alternative title has changed from simple string to an array. +Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array. For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] -### Improvements in the /versions API +### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output - when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. -### The following API endpoints have been added: +### The following API endpoints have been added: +- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - /api/files/{id}/downloadCount - /api/files/{id}/dataTables - /api/files/{id}/metadata/tabularTags New endpoint to set tabular file tags. @@ -42,11 +43,10 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created. - userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file. - hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version. -- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. - getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection. -- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset json file is in proper format and contains the required elements and fields for a given dataverse collection. +- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection. ### Extended the existing endpoints: - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. @@ -113,7 +113,7 @@ to generate updated versions. - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html See also #10060. -- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. @@ -192,7 +192,7 @@ Upgrading requires a maintenance window and downtime. Please plan ahead, create These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. ## Backward Incompatibilities -- Since Alternative Title is repeatable now, old json apis would not be compatible with a new version +- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, ## Complete List of Changes From 1be5d4b6b2baddc5f30bf598d81bd5ed991f73ee Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 12:52:39 -0500 Subject: [PATCH 326/414] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 189f21f2322..d0fe895565c 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -87,10 +87,8 @@ This parameter applies a filter criteria to the operation and supports the follo ### Misc - Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. - - Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. - - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports (which include the OAI-ORE metadata export file) have been updated to include information about the dataset version state, e.g. RELEASED or DEACCESSIONED @@ -104,7 +102,7 @@ Dataverse installations that have been using archival Bags may wish to update an existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) to generate updated versions. - +- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information @@ -124,7 +122,6 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. -- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. From 8e2ff826bdd0f41e598a56012fa780d5f9148a2e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 13:41:35 -0500 Subject: [PATCH 327/414] store tests --- .../dataaccess/GlobusOverlayAccessIOTest.java | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java index e69de29bb2d..792a9974076 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -0,0 +1,148 @@ +/* + * SPDX-License-Identifier: Apache 2.0 + */ +package edu.harvard.iq.dataverse.dataaccess; + +import edu.harvard.iq.dataverse.DOIServiceBean; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.mocks.MocksFactory; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import static org.junit.jupiter.api.Assertions.*; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; +import java.io.IOException; +import java.nio.file.Paths; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.STRICT_STUBS) +public class GlobusOverlayAccessIOTest { + + @Mock + + private Dataset dataset; + private DataFile mDatafile; + private DataFile rDatafile; + private String baseStoreId1 = "182ad2bda2f-c3508e719076"; + private String baseStoreId2 = "182ad2bda2f-c3508e719077"; + private String logoPath = "d7c42580-6538-4605-9ad8-116a61982644/hdc1/image002.mrc"; + private String authority = "10.5072"; + private String identifier = "F2ABCDEF"; + + @BeforeEach + public void setUp() { + // Base Store + System.setProperty("dataverse.files.base.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + System.setProperty("dataverse.files.base.label", "default"); + System.setProperty("dataverse.files.base.directory", "/tmp/files"); + + // Managed Globus Store + + // Nonsense endpoint/paths + System.setProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + // Nonsense value of the right form + System.setProperty("dataverse.files.globusm.globus-token", + "NzM2NTQxMDMtOTg1Yy00NDgzLWE1MTYtYTJlNDk0ZmI3MDhkOkpJZGZaZGxMZStQNUo3MTRIMDY2cDh6YzIrOXI2RmMrbFR6UG0zcSsycjA9"); + System.setProperty("dataverse.files.globusm.remote-store-name", "GlobusEndpoint1"); + System.setProperty("dataverse.files.globusm.type", "globus"); + System.setProperty("dataverse.files.globusm.managed", "true"); + System.setProperty("dataverse.files.globusm.base-store", "base"); + System.setProperty("dataverse.files.globusm.label", "globusManaged"); + + // Remote Store + System.setProperty("dataverse.files.globusr.type", "globus"); + System.setProperty("dataverse.files.globusr.base-store", "base"); + System.setProperty("dataverse.files.globusr.managed", "false"); + System.setProperty("dataverse.files.globusm.label", "globusRemote"); + System.setProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + System.setProperty("dataverse.files.globusr.remote-store-name", "DemoDataCorp"); + dataset = MocksFactory.makeDataset(); + dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", + DOIServiceBean.DOI_RESOLVER_URL, null)); + mDatafile = MocksFactory.makeDataFile(); + mDatafile.setOwner(dataset); + mDatafile.setStorageIdentifier("globusm://" + baseStoreId1); + + rDatafile = MocksFactory.makeDataFile(); + rDatafile.setOwner(dataset); + rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath); + } + + @AfterEach + public void tearDown() { + System.clearProperty("dataverse.files.base.type"); + System.clearProperty("dataverse.files.base.label"); + System.clearProperty("dataverse.files.base.directory"); + System.clearProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH); + System.clearProperty("dataverse.files.globusm.globus-token"); + System.clearProperty("dataverse.files.globusm.remote-store-name"); + System.clearProperty("dataverse.files.globusm.type"); + System.clearProperty("dataverse.files.globusm.managed"); + System.clearProperty("dataverse.files.globusm.base-store"); + System.clearProperty("dataverse.files.globusm.label"); + System.clearProperty("dataverse.files.globusr.type"); + System.clearProperty("dataverse.files.globusr.base-store"); + System.clearProperty("dataverse.files.globusr.managed"); + System.clearProperty("dataverse.files.globusm.label"); + System.clearProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS); + System.clearProperty("dataverse.files.globusr.remote-store-name"); + } + + @Test + void testGlobusOverlayIdentifiers() throws IOException { + assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusm", mDatafile.getStorageIdentifier())); + assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusr", rDatafile.getStorageIdentifier())); + assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusm", "globusr://localid//../of/the/hill")); + assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusr", + rDatafile.getStorageIdentifier().replace("hdc1", ""))); + + // We can read the storageIdentifier and get the driver + assertTrue(mDatafile.getStorageIdentifier() + .startsWith(DataAccess.getStorageDriverFromIdentifier(mDatafile.getStorageIdentifier()))); + assertTrue(rDatafile.getStorageIdentifier() + .startsWith(DataAccess.getStorageDriverFromIdentifier(rDatafile.getStorageIdentifier()))); + + // We can get the driver type from it's ID + assertTrue(DataAccess.getDriverType("globusm").equals(System.getProperty("dataverse.files.globusm.type"))); + assertTrue(DataAccess.getDriverType("globusr").equals(System.getProperty("dataverse.files.globusr.type"))); + + // When we get a StorageIO for the file, it is the right type + StorageIO mStorageIO = DataAccess.getStorageIO(mDatafile); + assertTrue(mStorageIO instanceof GlobusOverlayAccessIO); + StorageIO rStorageIO = DataAccess.getStorageIO(rDatafile); + assertTrue(rStorageIO instanceof GlobusOverlayAccessIO); + + // When we use it, we can get properties like the remote store name + assertTrue(mStorageIO.getRemoteStoreName() + .equals(System.getProperty("dataverse.files.globusm.remote-store-name"))); + assertTrue(rStorageIO.getRemoteStoreName() + .equals(System.getProperty("dataverse.files.globusr.remote-store-name"))); + + // Storage Locations are correct + String mLocation = mStorageIO.getStorageLocation(); + assertEquals("globusm:///" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + + "/" + baseStoreId1, mLocation); + String rLocation = rStorageIO.getStorageLocation(); + assertEquals("globusr://" + baseStoreId2 + "//" + logoPath, rLocation); + + // If we ask for the path for an aux file, it is correct + System.out.println(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, + identifier, baseStoreId1 + ".auxobject").toString()); + System.out.println(mStorageIO.getAuxObjectAsPath("auxobject").toString()); + assertTrue(Paths.get(System.getProperty("dataverse.files.base.directory", "/tmp/files"), authority, identifier, + baseStoreId1 + ".auxobject").equals(mStorageIO.getAuxObjectAsPath("auxobject"))); + assertTrue(Paths.get(System.getProperty("dataverse.files.base.directory", "/tmp/files"), authority, identifier, + baseStoreId2 + ".auxobject").equals(rStorageIO.getAuxObjectAsPath("auxobject"))); + } +} From 865c9feb4230a0a3bc9880cb6088a563b3fe21fc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 13:53:35 -0500 Subject: [PATCH 328/414] getConfig tests --- .../iq/dataverse/dataaccess/StorageIOTest.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java index 2ed9d18036d..84a241b90f6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java @@ -243,4 +243,16 @@ public void testGenerateVariableHeader() { assertEquals("Random Random\n", instance.generateVariableHeader(dvs)); assertEquals(null, instance.generateVariableHeader(null)); } + + @Test + public void testGetConfigParam() { + System.setProperty("dataverse.files.globus.type", "globus"); + assertEquals("globus", StorageIO.getConfigParamForDriver("globus", StorageIO.TYPE)); + System.clearProperty("dataverse.files.globus.type"); + } + + @Test + public void testGetConfigParamWithDefault() { + assertEquals(DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER, StorageIO.getConfigParamForDriver("globus", AbstractRemoteOverlayAccessIO.BASE_STORE, DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER)); + } } From cb1beaae490126c2274219dfcb4cae56094b096a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 6 Dec 2023 14:11:15 -0500 Subject: [PATCH 329/414] finish changing minio secret key #6783 This should have been part of 811d79a7 --- docker-compose-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index e68215d53d2..5265a6b7c2d 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -42,7 +42,7 @@ services: -Ddataverse.files.minio1.upload-redirect=false -Ddataverse.files.minio1.download-redirect=false -Ddataverse.files.minio1.access-key=4cc355_k3y - -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k35 + -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k3y ports: - "8080:8080" # HTTP (Dataverse Application) - "4848:4848" # HTTP (Payara Admin Console) From 5b7a560a380db12d083e82a19a865eb79559e0a4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 14:41:31 -0500 Subject: [PATCH 330/414] refactor, test for getFileMap --- .../harvard/iq/dataverse/api/Datasets.java | 3 +- .../AbstractRemoteOverlayAccessIO.java | 2 +- .../dataverse/globus/GlobusServiceBean.java | 134 +++++++++--------- .../iq/dataverse/globus/GlobusUtil.java | 33 +++++ .../dataaccess/GlobusOverlayAccessIOTest.java | 1 - .../iq/dataverse/globus/GlobusUtilTest.java | 88 ++++++++++++ 6 files changed, 190 insertions(+), 71 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 939ebf1dcd4..b3bfc476423 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -110,6 +110,7 @@ import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.globus.GlobusUtil; import java.io.IOException; import java.io.InputStream; @@ -3996,7 +3997,7 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat } } // Allowed to download all requested files - JsonObject files = globusService.getFilesMap(dataFiles, dataset); + JsonObject files = GlobusUtil.getFilesMap(dataFiles, dataset); if (GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { // If managed, give the principal read permissions int status = globusService.setPermissionForDownload(dataset, body.getString("principal")); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java index 8d058b7c9e3..6c26502acfa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -42,7 +42,7 @@ public abstract class AbstractRemoteOverlayAccessIO extends StorageIO { protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); - protected static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; + public static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; static final String BASE_STORE = "base-store"; protected static final String SECRET_KEY = "secret-key"; static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 37959188857..8cc8e491416 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -159,9 +159,11 @@ public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger } } - /** Request read/write access for the specified principal and generate a list of accessible paths for new files for the specified dataset. + /** + * Request read/write access for the specified principal and generate a list of + * accessible paths for new files for the specified dataset. * - * @param principal - the id of the Globus principal doing the transfer + * @param principal - the id of the Globus principal doing the transfer * @param dataset * @param numberOfPaths - how many files are to be transferred * @return @@ -230,10 +232,15 @@ private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissi } } - /** Given an array of remote files to be referenced in the dataset, create a set of valid storage identifiers and return a map of the remote file paths to storage identifiers. + /** + * Given an array of remote files to be referenced in the dataset, create a set + * of valid storage identifiers and return a map of the remote file paths to + * storage identifiers. * * @param dataset - * @param referencedFiles - a JSON array of remote files to be referenced in the dataset - each should be a string with the /path/to/file + * @param referencedFiles - a JSON array of remote files to be referenced in the + * dataset - each should be a string with the /path/to/file * @return - a map of supplied paths to valid storage identifiers */ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) { @@ -262,15 +269,17 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref return fileMap.build(); } - - /** A cache of temporary permission requests - for upload (rw) and download (r) access. - * When a temporary permission request is created, it is added to the cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started, the permission will be revoked/deleted. - * (If a transfer has been started, the permission will not be revoked/deleted until the transfer is complete. This is handled in other methods.) + /** + * A cache of temporary permission requests - for upload (rw) and download (r) + * access. When a temporary permission request is created, it is added to the + * cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started, + * the permission will be revoked/deleted. (If a transfer has been started, the + * permission will not be revoked/deleted until the transfer is complete. This + * is handled in other methods.) */ // Single cache of open rules/permission requests private final Cache rulesCache = Caffeine.newBuilder() - .expireAfterWrite( - Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) + .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) .scheduler(Scheduler.systemScheduler()).evictionListener((ruleId, datasetId, cause) -> { // Delete rules that expire logger.fine("Rule " + ruleId + " expired"); @@ -280,20 +289,24 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref .build(); - //Convenience method to add a temporary permission request to the cache - allows logging of temporary permission requests + // Convenience method to add a temporary permission request to the cache - + // allows logging of temporary permission requests private void monitorTemporaryPermissions(String ruleId, long datasetId) { logger.fine("Adding rule " + ruleId + " for dataset " + datasetId); rulesCache.put(ruleId, datasetId); } -/** Call the Globus API to get info about the transfer. - * - * @param accessToken - * @param taskId - the Globus task id supplied by the user - * @param globusLogger - the transaction-specific logger to use (separate log files are created in general, some calls may use the class logger) - * @return - * @throws MalformedURLException - */ + /** + * Call the Globus API to get info about the transfer. + * + * @param accessToken + * @param taskId - the Globus task id supplied by the user + * @param globusLogger - the transaction-specific logger to use (separate log + * files are created in general, some calls may use the + * class logger) + * @return + * @throws MalformedURLException + */ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); @@ -313,9 +326,12 @@ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger return task; } - /** Globus call to get an access token for the user using the long-term token we hold. + /** + * Globus call to get an access token for the user using the long-term token we + * hold. * - * @param globusBasicToken - the base64 encoded Globus Basic token comprised of the : + * @param globusBasicToken - the base64 encoded Globus Basic token comprised of + * the : * @return - a valid Globus access token */ public static AccessToken getClientToken(String globusBasicToken) { @@ -433,7 +449,6 @@ static class MakeRequestResponse { } - /** * Cache of open download Requests This cache keeps track of the set of files * selected for transfer out (download) via Globus. It is a means of @@ -480,10 +495,11 @@ public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); } - /** Generated the App URl for upload (in) or download (out) + /** + * Generated the App URl for upload (in) or download (out) * - * @param d - the dataset involved - * @param upload - boolean, true for upload, false for download + * @param d - the dataset involved + * @param upload - boolean, true for upload, false for download * @param dataFiles - a list of the DataFiles to be downloaded * @return */ @@ -516,7 +532,7 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List downloadDFList) { return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadDFList)); - } @Asynchronous @@ -608,8 +605,8 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S rulesCache.invalidate(ruleId); } } - - //Wait before first check + + // Wait before first check Thread.sleep(5000); // globus task status check task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); @@ -907,8 +904,8 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro } task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - - //Transfer is done (success or failure) so delete the rule + + // Transfer is done (success or failure) so delete the rule if (ruleId != null) { logger.info("Deleting: rule: " + ruleId); deletePermission(ruleId, dataset, globusLogger); @@ -1150,13 +1147,14 @@ private GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { return endpoint; } - + // This helper method is called from the Download terms/guestbook/etc. popup, // when the user clicks the "ok" button. We use it, instead of calling // downloadServiceBean directly, in order to differentiate between single // file downloads and multiple (batch) downloads - since both use the same // terms/etc. popup. - public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, boolean doNotSaveGuestbookResponse) { + public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, + boolean doNotSaveGuestbookResponse) { PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); @@ -1170,7 +1168,7 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, apiToken = new ApiToken(); apiToken.setTokenString(privUrl.getToken()); } - + DataFile df = guestbookResponse.getDataFile(); if (df != null) { logger.fine("Single datafile case for writeGuestbookAndStartTransfer"); @@ -1179,35 +1177,35 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, if (!doNotSaveGuestbookResponse) { fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); } - PrimeFaces.current() - .executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList)); + PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList)); } else { - //Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload + // Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload List list = new ArrayList<>(Arrays.asList(guestbookResponse.getSelectedFileIds().split(","))); List selectedFiles = new ArrayList(); for (String idAsString : list) { try { Long fileId = Long.parseLong(idAsString); - // If we need to create a GuestBookResponse record, we have to - // look up the DataFile object for this file: - if (!doNotSaveGuestbookResponse) { - df = dataFileService.findCheapAndEasy(fileId); - guestbookResponse.setDataFile(df); - fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); - selectedFiles.add(df); - } + // If we need to create a GuestBookResponse record, we have to + // look up the DataFile object for this file: + if (!doNotSaveGuestbookResponse) { + df = dataFileService.findCheapAndEasy(fileId); + guestbookResponse.setDataFile(df); + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + selectedFiles.add(df); + } } catch (NumberFormatException nfe) { - logger.warning("A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: " + idAsString); + logger.warning( + "A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: " + + idAsString); return; } } if (!selectedFiles.isEmpty()) { - //Use dataset from one file - files should all be from the same dataset - PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, - selectedFiles)); + // Use dataset from one file - files should all be from the same dataset + PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, selectedFiles)); } } - } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java new file mode 100644 index 00000000000..92cf8ac7704 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java @@ -0,0 +1,33 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.List; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; + +public class GlobusUtil { + + public static JsonObject getFilesMap(List dataFiles, Dataset d) { + JsonObjectBuilder filesBuilder = Json.createObjectBuilder(); + for (DataFile df : dataFiles) { + String storageId = df.getStorageIdentifier(); + String[] parts = DataAccess + .getDriverIdAndStorageLocation(DataAccess.getLocationFromStorageId(storageId, d)); + String driverId = parts[0]; + String fileLocation = parts[1]; + if (GlobusAccessibleStore.isDataverseManaged(driverId)) { + String endpointWithBasePath = GlobusAccessibleStore.getTransferEnpointWithPath(driverId); + fileLocation = endpointWithBasePath + "/" + fileLocation; + } else { + fileLocation = storageId.substring(storageId.lastIndexOf("//") + 2); + } + filesBuilder.add(df.getId().toString(), fileLocation); + } + return filesBuilder.build(); + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java index 792a9974076..856d71d7dc0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -6,7 +6,6 @@ import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.mocks.MocksFactory; import org.junit.jupiter.api.AfterEach; diff --git a/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java new file mode 100644 index 00000000000..56f8731b9c8 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java @@ -0,0 +1,88 @@ +package edu.harvard.iq.dataverse.globus; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.mock; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; + +import edu.harvard.iq.dataverse.DOIServiceBean; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; +import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.json.JsonObject; + +public class GlobusUtilTest { + + private Dataset dataset; + private DataFile mDatafile; + private DataFile rDatafile; + private String baseStoreId1 = "182ad2bda2f-c3508e719076"; + private String baseStoreId2 = "182ad2bda2f-c3508e719077"; + private String logoPath = "d7c42580-6538-4605-9ad8-116a61982644/hdc1/image002.mrc"; + private String authority = "10.5072"; + private String identifier = "F2ABCDEF"; + + @BeforeEach + public void setUp() { + + // Managed Globus Store + + // Nonsense endpoint/paths + System.setProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + System.setProperty("dataverse.files.globusm.managed", "true"); + + // Remote Store + System.setProperty("dataverse.files.globusr.managed", "false"); + System.setProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + + dataset = MocksFactory.makeDataset(); + dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", + DOIServiceBean.DOI_RESOLVER_URL, null)); + mDatafile = MocksFactory.makeDataFile(); + mDatafile.setOwner(dataset); + mDatafile.setStorageIdentifier("globusm://" + baseStoreId1); + + rDatafile = MocksFactory.makeDataFile(); + rDatafile.setOwner(dataset); + rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath); + List files = new ArrayList(); + files.add(mDatafile); + files.add(rDatafile); + dataset.setFiles(files); + } + + @AfterEach + public void tearDown() { + System.clearProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH); + System.clearProperty("dataverse.files.globusm.managed"); + System.clearProperty("dataverse.files.globusr.managed"); + System.clearProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS); + } + + + @Test + public void testgetFilesMap() { + + JsonObject jo = GlobusUtil.getFilesMap(dataset.getFiles(), dataset); + System.out.println(JsonUtil.prettyPrint(jo)); + assertEquals(jo.getString(Long.toString(mDatafile.getId())), "d7c42580-6538-4605-9ad8-116a61982644/hdc1/10.5072/F2ABCDEF/182ad2bda2f-c3508e719076"); + assertEquals(jo.getString(Long.toString(rDatafile.getId())), logoPath); + } +} From 4ba629d643678acdd0b649128b8a76a805ee6906 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 15:28:32 -0500 Subject: [PATCH 331/414] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index d0fe895565c..38b99e6580b 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -11,6 +11,10 @@ This release contains major upgrades to core components. Detailed upgrade instru ## Detailed Release Highlights, New Features and Use Case Scenarios +### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started +Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). + The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. + ### Dataverse installation can be now be configured to allow out-of-band upload In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). @@ -142,7 +146,7 @@ life easier during instance setups and reconfiguration. You no longer need to ge necessary JSON file. ### Adding PKCE Support - +[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) From 93d9b35a07625622523a4490eee8f55d617defec Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 16:32:17 -0500 Subject: [PATCH 332/414] future test code - requires config of Globus stores --- .../harvard/iq/dataverse/api/DatasetsIT.java | 53 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 32 +++++++++++ .../dataaccess/GlobusOverlayAccessIOTest.java | 34 ++++++------ 3 files changed, 104 insertions(+), 15 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 6a746b7c5b5..928574eb82b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -42,6 +42,9 @@ import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIOTest; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import org.apache.commons.lang3.StringUtils; @@ -135,6 +138,7 @@ public static void setUpClass() { .statusCode(200); */ } + @AfterAll public static void afterClass() { @@ -4175,4 +4179,53 @@ public void testGetUserPermissionsOnDataset() { Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getUserPermissionsOnDataset("testInvalidId", apiToken); getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); } + + //Requires that a Globus remote store be set up as with the parameters in the GlobusOverlayAccessIOTest class + //Tests whether the API call succeeds and has some of the expected parameters + @Test + @Disabled + public void testGetGlobusUploadParameters() { + //Creates managed and remote Globus stores + GlobusOverlayAccessIOTest.setUp(); + + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + + Response makeSuperUser = UtilIT.makeSuperUser(username); + assertEquals(200, makeSuperUser.getStatusCode()); + + Response setDriver = UtilIT.setDatasetStorageDriver(datasetId, System.getProperty("dataverse.files.globusr.label"), apiToken); + assertEquals(200, setDriver.getStatusCode()); + + Response getUploadParams = UtilIT.getDatasetGlobusUploadParameters(datasetId, "en_us", apiToken); + assertEquals(200, getUploadParams.getStatusCode()); + JsonObject data = JsonUtil.getJsonObject(getUploadParams.getBody().asString()); + JsonObject queryParams = data.getJsonObject("queryParameters"); + assertEquals("en_us", queryParams.getString("dvLocale")); + assertEquals("false", queryParams.getString("managed")); + //Assumes only one reference endpoint with a basepath is configured + assertTrue(queryParams.getJsonArray("referenceEndpointsWithPaths").get(0).toString().indexOf(System.getProperty("dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS)) > -1); + JsonArray signedUrls = data.getJsonArray("signedUrls"); + boolean found = false; + for (int i = 0; i < signedUrls.size(); i++) { + JsonObject signedUrl = signedUrls.getJsonObject(i); + if (signedUrl.getString("name").equals("requestGlobusReferencePaths")) { + found=true; + break; + } + } + assertTrue(found); + //Removes managed and remote Globus stores + GlobusOverlayAccessIOTest.tearDown(); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 869e755a183..bd2fe7e6f0b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3636,4 +3636,36 @@ static Response downloadTmpFile(String fullyQualifiedPathToFile, String apiToken .get("/api/admin/downloadTmpFile?fullyQualifiedPathToFile=" + fullyQualifiedPathToFile); } + static Response setDatasetStorageDriver(Integer datasetId, String driverLabel, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(driverLabel) + .put("/api/datasets/" + datasetId + "/storageDriver"); + } + + + //Globus Store related - not currently used + + static Response getDatasetGlobusUploadParameters(Integer datasetId, String locale, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .get("/api/datasets/" + datasetId + "/globusUploadParameters?locale=" + locale); + } + + static Response getDatasetGlobusDownloadParameters(Integer datasetId, String locale, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .get("/api/datasets/" + datasetId + "/globusDownloadParameters?locale=" + locale); + } + + static Response requestGlobusDownload(Integer datasetId, JsonObject body, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(body) + .contentType("application/json") + .post("/api/datasets/" + datasetId + "/requestGlobusDownload"); + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java index 856d71d7dc0..1c84fa90a9e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -8,8 +8,9 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.mocks.MocksFactory; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import static org.junit.jupiter.api.Assertions.*; @@ -35,8 +36,8 @@ public class GlobusOverlayAccessIOTest { private String authority = "10.5072"; private String identifier = "F2ABCDEF"; - @BeforeEach - public void setUp() { + @BeforeAll + public static void setUp() { // Base Store System.setProperty("dataverse.files.base.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); System.setProperty("dataverse.files.base.label", "default"); @@ -65,20 +66,11 @@ public void setUp() { "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); System.setProperty("dataverse.files.globusr.remote-store-name", "DemoDataCorp"); - dataset = MocksFactory.makeDataset(); - dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", - DOIServiceBean.DOI_RESOLVER_URL, null)); - mDatafile = MocksFactory.makeDataFile(); - mDatafile.setOwner(dataset); - mDatafile.setStorageIdentifier("globusm://" + baseStoreId1); - rDatafile = MocksFactory.makeDataFile(); - rDatafile.setOwner(dataset); - rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath); } - @AfterEach - public void tearDown() { + @AfterAll + public static void tearDown() { System.clearProperty("dataverse.files.base.type"); System.clearProperty("dataverse.files.base.label"); System.clearProperty("dataverse.files.base.directory"); @@ -100,6 +92,18 @@ public void tearDown() { @Test void testGlobusOverlayIdentifiers() throws IOException { + + dataset = MocksFactory.makeDataset(); + dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", + DOIServiceBean.DOI_RESOLVER_URL, null)); + mDatafile = MocksFactory.makeDataFile(); + mDatafile.setOwner(dataset); + mDatafile.setStorageIdentifier("globusm://" + baseStoreId1); + + rDatafile = MocksFactory.makeDataFile(); + rDatafile.setOwner(dataset); + rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath); + assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusm", mDatafile.getStorageIdentifier())); assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusr", rDatafile.getStorageIdentifier())); assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusm", "globusr://localid//../of/the/hill")); From 12b7c306dd31ebd987a2bae5f36dae27e4f0ba56 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 16:32:24 -0500 Subject: [PATCH 333/414] typo --- .../iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java index 1c84fa90a9e..ad980aa28cd 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -61,7 +61,7 @@ public static void setUp() { System.setProperty("dataverse.files.globusr.type", "globus"); System.setProperty("dataverse.files.globusr.base-store", "base"); System.setProperty("dataverse.files.globusr.managed", "false"); - System.setProperty("dataverse.files.globusm.label", "globusRemote"); + System.setProperty("dataverse.files.globusr.label", "globusRemote"); System.setProperty( "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); From 1426dfb6fc52ace869e3c822a732d5b408ca7c4c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 16:47:54 -0500 Subject: [PATCH 334/414] add missing setting to release notes, add a todo to use two delays --- doc/release-notes/10162-globus-support.md | 7 ++++++- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md index d64e72b70a1..7bc3990f840 100644 --- a/doc/release-notes/10162-globus-support.md +++ b/doc/release-notes/10162-globus-support.md @@ -1,4 +1,6 @@ -Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) +Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, +and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. +Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) - Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incomatibilities. - The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model - Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus) @@ -10,5 +12,8 @@ Backward Incompatibilities: New JVM Options: - A new 'globus' store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). +- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. + + Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 8cc8e491416..d0660a55a6a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -277,6 +277,8 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref * permission will not be revoked/deleted until the transfer is complete. This * is handled in other methods.) */ + // ToDo - nominally this doesn't need to be as long as the allowed time for the + // downloadCache so there could be two separate settings. // Single cache of open rules/permission requests private final Cache rulesCache = Caffeine.newBuilder() .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) From d2427bd39046f104c95e27d1869d1665b969724f Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 09:49:52 -0500 Subject: [PATCH 335/414] #10151 incorporate recent additions --- doc/release-notes/6.1-release-notes.md | 22 +++++++++++++++++++++ doc/release-notes/8549-collection-quotas.md | 3 --- doc/release-notes/8760-bagit.md | 15 -------------- 3 files changed, 22 insertions(+), 18 deletions(-) delete mode 100644 doc/release-notes/8549-collection-quotas.md delete mode 100644 doc/release-notes/8760-bagit.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 38b99e6580b..38a7a1064e6 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -30,6 +30,28 @@ With the upload-out-of-band option enabled, it is also possible for file upload Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array. For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] +### Collection Storage Size Quota Support +-This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. +Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 + +### BagIT Export Configurations Updated +For BagIT export, it is now possible to configure the following information in bag-info.txt... + +Source-Organization: Harvard Dataverse +Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA +Organization-Email: support@dataverse.harvard.edu + +... using new JVM/MPCONFIG options: + +- dataverse.bagit.sourceorg.name +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.email + +Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. + +For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt + + ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output diff --git a/doc/release-notes/8549-collection-quotas.md b/doc/release-notes/8549-collection-quotas.md deleted file mode 100644 index b3635d0c5a1..00000000000 --- a/doc/release-notes/8549-collection-quotas.md +++ /dev/null @@ -1,3 +0,0 @@ -This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. -Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 - diff --git a/doc/release-notes/8760-bagit.md b/doc/release-notes/8760-bagit.md deleted file mode 100644 index 30601857309..00000000000 --- a/doc/release-notes/8760-bagit.md +++ /dev/null @@ -1,15 +0,0 @@ -For BagIT export, it is now possible to configure the following information in bag-info.txt... - -Source-Organization: Harvard Dataverse -Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA -Organization-Email: support@dataverse.harvard.edu - -... using new JVM/MPCONFIG options: - -- dataverse.bagit.sourceorg.name -- dataverse.bagit.sourceorg.address -- dataverse.bagit.sourceorg.email - -Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. - -For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt From 05c53066ea26c809b6376051ff336f11a4bcee9d Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 7 Dec 2023 10:29:47 -0500 Subject: [PATCH 336/414] mention download tmp file API #10151 --- doc/release-notes/6.1-release-notes.md | 1 + doc/release-notes/8760-download-tmp-file.md | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 doc/release-notes/8760-download-tmp-file.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 38a7a1064e6..1b4e884cded 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -73,6 +73,7 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. - getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection. - validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection. +- downloadTmpFile (/api/admin/downloadTmpFile): For testing purposes, allows files to be downloaded from /tmp. ### Extended the existing endpoints: - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. diff --git a/doc/release-notes/8760-download-tmp-file.md b/doc/release-notes/8760-download-tmp-file.md deleted file mode 100644 index 7623a91ac9a..00000000000 --- a/doc/release-notes/8760-download-tmp-file.md +++ /dev/null @@ -1,3 +0,0 @@ -A new API has been added for testing purposes that allows files to be downloaded from /tmp. - -See From 97c33218fa7224c544657e72f52c27d9cd8951bf Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 7 Dec 2023 10:30:23 -0500 Subject: [PATCH 337/414] remove duplicate "new" heading in API changelog #10151 --- doc/sphinx-guides/source/api/changelog.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index d2908533a14..910134e14f3 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -12,9 +12,6 @@ New ~~~ - **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`. - **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`. - -New -~~~ - **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`. - **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`. From 3a13ac8c56385ed2cc82bcc9db4f57fea7688a67 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 10:34:21 -0500 Subject: [PATCH 338/414] #10151 add upgrade instructions --- doc/release-notes/6.1-release-notes.md | 81 +++++++++++++++++++ .../9002_allow_direct_upload_setting.md | 5 -- 2 files changed, 81 insertions(+), 5 deletions(-) delete mode 100644 doc/release-notes/9002_allow_direct_upload_setting.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 38a7a1064e6..d5972338124 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -51,6 +51,13 @@ Previously, customization was possible by editing `Bundle.properties` but this i For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt +### Direct Upload setting added +A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. + +By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). + +With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. + ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions @@ -138,6 +145,7 @@ to generate updated versions. See also #10060. - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. @@ -214,6 +222,79 @@ Upgrading requires a maintenance window and downtime. Please plan ahead, create These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. +0\. These instructions assume that you are upgrading from 6.0. If you are running an earlier version, the only safe way to upgrade is to progress through the upgrades to all the releases in between before attempting the upgrade to 5.14. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +In the following commands we assume that Payara 6 is installed in `/usr/local/payara6`. If not, adjust as needed. + +`export PAYARA=/usr/local/payara6` + +(or `setenv PAYARA /usr/local/payara6` if you are using a `csh`-like shell) + +1\. Undeploy the previous version. + +- `$PAYARA/bin/asadmin undeploy dataverse-6.0` + +2\. Stop Payara and remove the generated directory + +- `service payara stop` +- `rm -rf $PAYARA/glassfish/domains/domain1/generated` + +3\. Start Payara + +- `service payara start` + +4\. Deploy this version. + +- `$PAYARA/bin/asadmin deploy dataverse-6.1.war` + +5\. Restart Payara + +- `service payara stop` +- `service payara start` + +6\. Update Geospatial Metadata Block (to improve validation of bounding box values) + +- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv` + +6a\. Update Citation Metadata Block (to make Alternative Title repeatable) + +- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` + +7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). + +7a\. For installations without custom or experimental metadata blocks: + +- Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.14/installation/prerequisites.html#solr-init-script)) + +- Replace schema.xml + + - `cp /tmp/dvinstall/schema.xml /usr/local/solr/solr-9.3.0/server/solr/collection1/conf` + +- Start Solr instance (usually `service solr start`, depending on Solr/OS) + +7b\. For installations with custom or experimental metadata blocks: + +- Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.14/installation/prerequisites.html#solr-init-script)) + +- There are 2 ways to regenerate the schema: Either by collecting the output of the Dataverse schema API and feeding it to the `update-fields.sh` script that we supply, as in the example below (modify the command lines as needed): +``` + wget https://raw.githubusercontent.com/IQSS/dataverse/master/conf/solr/9.3.0/update-fields.sh + chmod +x update-fields.sh + curl "http://localhost:8080/api/admin/index/solr/schema" | ./update-fields.sh /usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml +``` +OR, alternatively, you can edit the following line in your schema.xml by hand as follows (to indicate that alternative title is now `multiValued="true"`): +``` + +``` + +- Restart Solr instance (usually `service solr restart` depending on solr/OS) + +8\. Run ReExportAll to update dataset metadata exports. Follow the directions in the [Admin Guide](http://guides.dataverse.org/en/5.14/admin/metadataexport.html#batch-exports-through-the-api). + + ## Backward Incompatibilities - Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, diff --git a/doc/release-notes/9002_allow_direct_upload_setting.md b/doc/release-notes/9002_allow_direct_upload_setting.md deleted file mode 100644 index 1e76ed4ad47..00000000000 --- a/doc/release-notes/9002_allow_direct_upload_setting.md +++ /dev/null @@ -1,5 +0,0 @@ -A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. - -By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). - -With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. From a78213633e6f5bf345d1aedf4328eee5ee231ffb Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 10:43:40 -0500 Subject: [PATCH 339/414] #10151 remove notes previously incorporated --- .../9547-validation-for-geospatial-metadata.md | 9 --------- doc/release-notes/9859-ORE and Bag updates.md | 14 -------------- 2 files changed, 23 deletions(-) delete mode 100644 doc/release-notes/9547-validation-for-geospatial-metadata.md delete mode 100644 doc/release-notes/9859-ORE and Bag updates.md diff --git a/doc/release-notes/9547-validation-for-geospatial-metadata.md b/doc/release-notes/9547-validation-for-geospatial-metadata.md deleted file mode 100644 index a44e1a3732b..00000000000 --- a/doc/release-notes/9547-validation-for-geospatial-metadata.md +++ /dev/null @@ -1,9 +0,0 @@ -Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. - -For the "upgrade" steps section: - -Update Geospatial Metadata Block - -- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv` -- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv` - diff --git a/doc/release-notes/9859-ORE and Bag updates.md b/doc/release-notes/9859-ORE and Bag updates.md deleted file mode 100644 index dd3ae3bbbe1..00000000000 --- a/doc/release-notes/9859-ORE and Bag updates.md +++ /dev/null @@ -1,14 +0,0 @@ -Dataverse's OAI_ORE Metadata Export format and archival BagIT exports -(which include the OAI-ORE metadata export file) have been updated to include -information about the dataset version state, e.g. RELEASED or DEACCESSIONED -and to indicate which version of Dataverse was used to create the archival Bag. -As part of the latter, the current OAI_ORE Metadata format has been given a 1.0.0 -version designation and it is expected that any future changes to the OAI_ORE export -format will result in a version change and that tools such as DVUploader that can -recreate datasets from archival Bags will start indicating which version(s) of the -OAI_ORE format they can read. - -Dataverse installations that have been using archival Bags may wish to update any -existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse -[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) -to generate updated versions. \ No newline at end of file From b517f6e0fca1802faa4455522a72e711963714ba Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 10:53:07 -0500 Subject: [PATCH 340/414] #10151 S3 test notes --- doc/release-notes/6.1-release-notes.md | 2 ++ doc/release-notes/6783-s3-tests.md | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 doc/release-notes/6783-s3-tests.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 9a35a31a734..375717ab9c9 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -163,6 +163,8 @@ For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.ht - `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. - As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. +- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. +- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. ## OpenID Connect Authentication Provider Improvements diff --git a/doc/release-notes/6783-s3-tests.md b/doc/release-notes/6783-s3-tests.md deleted file mode 100644 index 1b9bb400cc6..00000000000 --- a/doc/release-notes/6783-s3-tests.md +++ /dev/null @@ -1,3 +0,0 @@ -Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. - -In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. From 07a8659b60acdb766fb5a4742cf4ac4537e34615 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 8 Dec 2023 14:24:24 -0500 Subject: [PATCH 341/414] #10151 remove duplicate release note out of band setting previously added --- doc/release-notes/6.1-release-notes.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 375717ab9c9..b6bb7d8b806 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -51,14 +51,6 @@ Previously, customization was possible by editing `Bundle.properties` but this i For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt -### Direct Upload setting added -A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. - -By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). - -With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. - - ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output From ed5b0dbde90fd4b8592aa2bdce7ae205482063c8 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Dec 2023 15:44:18 -0500 Subject: [PATCH 342/414] Apply suggestions from code review Co-authored-by: Philip Durbin --- doc/release-notes/10162-globus-support.md | 2 +- doc/sphinx-guides/source/developers/big-data-support.rst | 4 ++-- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md index 7bc3990f840..60670b5b101 100644 --- a/doc/release-notes/10162-globus-support.md +++ b/doc/release-notes/10162-globus-support.md @@ -1,7 +1,7 @@ Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) -- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incomatibilities. +- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities. - The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model - Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus) - The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index fe49f9f6150..8d891e63317 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -149,7 +149,7 @@ Globus File Transfer Note: Globus file transfer is still experimental but feedback is welcome! See :ref:`support`. -Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) +Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) and a community-developed `dataverse-globus `_ app has been properly installed and configured. Globus endpoints can be in a variety of places, from data centers to personal computers. @@ -168,7 +168,7 @@ Dataverse-managed endpoints must be Globus 'guest collections' hosted on either S3 connector which requires a paid Globus subscription at the host institution). In either case, Dataverse is configured with the Globus credentials of a user account that can manage the endpoint. Users will need a Globus account, which can be obtained via their institution or directly from Globus (at no cost). -With the file-system endpoint, Dataverse does not currently have access to the file contents. Thus, functionlity related to ingest, previews, fixity hash validation, etc. are not available. (Using the S3-based endpoint, Dataverse has access via S3 and all functionlity normally associated with direct uploads to S3 is available.) +With the file-system endpoint, Dataverse does not currently have access to the file contents. Thus, functionality related to ingest, previews, fixity hash validation, etc. are not available. (Using the S3-based endpoint, Dataverse has access via S3 and all functionality normally associated with direct uploads to S3 is available.) For the reference use case, Dataverse must be configured with a list of allowed endpoint/base paths from which files may be referenced. In this case, since Dataverse is not accessing the remote endpoint itself, it does not need Globus credentials. Users will need a Globus account in this case, and the remote endpoint must be configured to allow them access (i.e. be publicly readable, or potentially involving some out-of-band mechanism to request access (that could be described in the dataset's Terms of Use and Access). diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 5b2b6982866..37d80d0a6cd 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -71,7 +71,7 @@ The getDatasetMetadata and getFileListing URLs are just signed versions of the s If called for a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a the "managed" parameter will be false, the "endpoint" parameter is replaced with a JSON array of "referenceEndpointsWithPaths" and the requestGlobusTransferPaths and addGlobusFiles URLs are replaced with ones for requestGlobusReferencePaths and addFiles. All of these calls are -describe further below. +described further below. The call to set up for a transfer out (download) is similar: From 1d668970df1562c3cbc85d60be2abc55d8a96572 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 8 Dec 2023 15:56:27 -0500 Subject: [PATCH 343/414] #10151 standard guide links --- doc/release-notes/6.1-release-notes.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index b6bb7d8b806..24194a02026 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -49,14 +49,14 @@ Organization-Email: support@dataverse.harvard.edu Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. -For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt +For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output - when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. -This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. +This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide. ### The following API endpoints have been added: - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). @@ -128,13 +128,13 @@ Dataverse installations that have been using archival Bags may wish to update an existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) to generate updated versions. -- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews +- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. -- We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html +- We have started maintaining an API changelog: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) From 85206de08acb6a8373199fb0d4eec2768cb6763d Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 8 Dec 2023 15:59:21 -0500 Subject: [PATCH 344/414] simply API changelog to be about breaking changes only #10151 --- doc/release-notes/6.1-release-notes.md | 2 +- doc/sphinx-guides/source/api/changelog.rst | 19 +++++-------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 24194a02026..a3b04749d68 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -134,7 +134,7 @@ to generate updated versions. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. -- We have started maintaining an API changelog: https://guides.dataverse.org/en/6.1/api/changelog.html +- We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 910134e14f3..20225b99b5c 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -1,5 +1,7 @@ -API Changelog -============= +API Changelog (Breaking Changes) +================================ + +This API changelog is experimental and we would love feedback on its usefulness. Its primary purpose is to inform API developers of any breaking changes. (We try not ship any backward incompatible changes, but it happens.) To see a list of new APIs and backward-compatible changes to existing API, please see each version's release notes at https://github.com/IQSS/dataverse/releases .. contents:: |toctitle| :local: @@ -8,20 +10,9 @@ API Changelog v6.1 ---- -New -~~~ -- **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`. -- **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`. -- **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`. -- **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`. - -Changes -~~~~~~~ -- **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`. +- The metadata field "Alternative Title" now supports multiple values so you must pass an array rather than a string when populating that field via API. See https://github.com/IQSS/dataverse/pull/9440 v6.0 ---- -Changes -~~~~~~~ - **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. See :doc:`dataaccess`. From 0cd87d167211ee6bc047de3cba3e79acfb520e28 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Dec 2023 16:37:03 -0500 Subject: [PATCH 345/414] address Review comments --- .../source/admin/integrations.rst | 12 ++++++++ doc/sphinx-guides/source/api/intro.rst | 4 +++ .../source/developers/globus-api.rst | 6 ++-- .../source/installation/config.rst | 9 +++--- .../edu/harvard/iq/dataverse/DatasetPage.java | 29 ------------------- .../AbstractRemoteOverlayAccessIO.java | 2 +- 6 files changed, 25 insertions(+), 37 deletions(-) diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index 9a24cf0715c..db566106b49 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -121,6 +121,18 @@ Its goal is to make the dashboard adjustable for a Dataverse installation's need The integrations dashboard is currently in development. A preview and more information can be found at: `rdm-integration GitHub repository `_ +Globus +++++++ + +Globus transfer uses an efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: + +* robust file transfer capable of restarting after network or endpoint failures +* third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation + +Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) +and a community-developed `dataverse-globus `_ app has been properly installed and configured. + + Embedding Data on Websites -------------------------- diff --git a/doc/sphinx-guides/source/api/intro.rst b/doc/sphinx-guides/source/api/intro.rst index 6c61bb8c20d..8eb11798dd7 100755 --- a/doc/sphinx-guides/source/api/intro.rst +++ b/doc/sphinx-guides/source/api/intro.rst @@ -187,6 +187,10 @@ Lists of Dataverse APIs - Files - etc. +- :doc:`/developers/dataset-semantic-metadata-api`: For creating, reading, editing, and deleting dataset metadata using JSON-LD. +- :doc:`/developers/dataset-migration-api`: For migrating datasets from other repositories while retaining the original persistent identifiers and publication date. +- :doc:`/developers/s3-direct-upload-api`: For the transfer of larger files/larger numbers of files directly to an S3 bucket managed by Dataverse. +- :doc:`/developers/globus-api`: For the Globus transfer of larger files/larger numbers of files directly via Globus endpoints managed by Dataverse or referencing files in remote endpoints. - :doc:`metrics`: For query statistics about usage of a Dataverse installation. - :doc:`sword`: For depositing data using a standards-based approach rather than the :doc:`native-api`. diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 37d80d0a6cd..de9df06a798 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -160,11 +160,11 @@ In the managed case, once a Globus transfer has been initiated a final API call export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV - export JSON_DATA="{"taskIdentifier":"3f530302-6c48-11ee-8428-378be0d9c521", \ + export JSON_DATA='{"taskIdentifier":"3f530302-6c48-11ee-8428-378be0d9c521", \ "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \ - {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}" + {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}' - curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA"" + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA" Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 4540219fc7c..f6c05a3bde8 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -499,8 +499,8 @@ Logging & Slow Performance .. _file-storage: -File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores and/or Globus Stores --------------------------------------------------------------------------------------------------------------------------- +File Storage +------------ By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara6/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. @@ -999,7 +999,8 @@ See :doc:`/developers/big-data-support` for additional information on how to use In addition to having the type "globus" and requiring a label, Globus Stores share many options with Trusted Remote Stores and options to specify and access a Globus endpoint(s). As with Remote Stores, Globus Stores also use a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). These and other available options are described in the table below. -There are two types of Globus stores +There are two types of Globus stores: + - managed - where Dataverse manages the Globus endpoint, deciding where transferred files are stored and managing access control for users transferring files to/from Dataverse - remote - where Dataverse references files that remain on trusted remote Globus endpoints @@ -1024,7 +1025,7 @@ Once you have configured a globus store, it is recommended that you install the dataverse.files..globus-token A Globus token (base64 endcoded : for a managed store) - using a microprofile alias is recommended (none) dataverse.files..reference-endpoints-with-basepaths A comma separated list of *remote* trusted Globus endpoint id/s (none) - dataverse.files..files-not-accessible-by-dataverse ``true``/``false`` Should be true for S3 Connector-based *managed* stores ``false`` + dataverse.files..files-not-accessible-by-dataverse ``true``/``false`` Should be false for S3 Connector-based *managed* stores, true for others ``false`` ======================================================= ================== ========================================================================== =================== diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 9c7d599ba33..b79f387f20b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -5312,36 +5312,7 @@ public boolean isFileAccessRequestMultiButtonEnabled(){ } return false; } -/* These appear to be unused - toDo - delete - private Boolean downloadButtonAllEnabled = null; - public boolean isDownloadAllButtonEnabled() { - - if (downloadButtonAllEnabled == null) { - for (FileMetadata fmd : workingVersion.getFileMetadatas()) { - if (!this.fileDownloadHelper.canDownloadFile(fmd)) { - downloadButtonAllEnabled = false; - break; - } - } - downloadButtonAllEnabled = true; - } - return downloadButtonAllEnabled; - } - - public boolean isDownloadSelectedButtonEnabled(){ - - if( this.selectedFiles == null || this.selectedFiles.isEmpty() ){ - return false; - } - for (FileMetadata fmd : this.selectedFiles){ - if (this.fileDownloadHelper.canDownloadFile(fmd)){ - return true; - } - } - return false; - } -*/ public boolean isFileAccessRequestMultiSignUpButtonRequired(){ if (isSessionUserAuthenticated()){ return false; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java index 6c26502acfa..10ff68a56f3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -50,7 +50,7 @@ public abstract class AbstractRemoteOverlayAccessIO extends protected static final String REMOTE_STORE_URL = "remote-store-url"; // Whether Dataverse can access the file bytes - //Currently True for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits + // Currently False only for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits static final String FILES_NOT_ACCESSIBLE_BY_DATAVERSE = "files-not-accessible-by-dataverse"; protected StorageIO baseStore = null; From 9dd3f9785c6a5c8939bd9f023400f5f10c3ef58d Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 11 Dec 2023 09:28:16 +0000 Subject: [PATCH 346/414] Added: release notes for #10155 --- .../10155-datasets-can-download-at-least-one-file.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/10155-datasets-can-download-at-least-one-file.md diff --git a/doc/release-notes/10155-datasets-can-download-at-least-one-file.md b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md new file mode 100644 index 00000000000..566d505f7ca --- /dev/null +++ b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md @@ -0,0 +1,3 @@ +The getCanDownloadAtLeastOneFile (/api/datasets/{id}/versions/{versionId}/canDownloadAtLeastOneFile) endpoint has been created. + +This endpoint allows to know if the calling user can download at least one file of a particular dataset version. From 9fb44d3d45080a2e5c9de15ab0445cc052c956b3 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 11 Dec 2023 09:33:56 +0000 Subject: [PATCH 347/414] Added: docs for #10155 --- doc/sphinx-guides/source/api/native-api.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 56190dd342c..99438520120 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2686,6 +2686,19 @@ In particular, the user permissions that this API call checks, returned as boole curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/userPermissions" +Know if a User can download at least one File from a Dataset Version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API call allows to know if the calling user can download at least one file of a dataset version. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export ID=24 + export VERSION=1.0 + + curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/versions/$VERSION/canDownloadAtLeastOneFile" + Files ----- From ca706662cd9f19b36d31530cf2747d810923ca3e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 11 Dec 2023 11:06:36 -0500 Subject: [PATCH 348/414] bug fix - allowing S3 w/Globus config to work for download --- .../iq/dataverse/dataaccess/GlobusAccessibleStore.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java index e4d062f0619..8bed60d8302 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -65,7 +65,11 @@ public static String getGlobusToken(String storeId) { } public static boolean isGlobusAccessible(String storeId) { - if(StorageIO.getConfigParamForDriver(storeId, StorageIO.TYPE).equals(DataAccess.GLOBUS)) { + String type = StorageIO.getConfigParamForDriver(storeId, StorageIO.TYPE); + if (type.equals(DataAccess.GLOBUS)) { + return true; + } else if (type.equals(DataAccess.S3) + && StorageIO.getConfigParamForDriver(storeId, TRANSFER_ENDPOINT_WITH_BASEPATH) != null) { return true; } return false; From 09a227b30a2b5da05829297a9173952596e2df9c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 11 Dec 2023 11:12:04 -0500 Subject: [PATCH 349/414] Change docs tp make clear that an S3 store can be used --- doc/sphinx-guides/source/installation/config.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f6c05a3bde8..a7d7905ca4a 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1004,10 +1004,10 @@ There are two types of Globus stores: - managed - where Dataverse manages the Globus endpoint, deciding where transferred files are stored and managing access control for users transferring files to/from Dataverse - remote - where Dataverse references files that remain on trusted remote Globus endpoints -For managed stores, there are two variants, connecting to standard/file-based Globus endpoints and to endpoints using an underlying S3 store via the Globus S3 Connector. +A managed Globus store connects to standard/file-based Globus endpoint. It is also possible to configure an S3 store as a managed store, if the managed endpoint uses an underlying S3 store via the Globus S3 Connector. With the former, Dataverse has no direct access to the file contents and functionality related to ingest, fixity hash validation, etc. are not available. With the latter, Dataverse can access files internally via S3 and the functionality supported is similar to that when using S3 direct upload. -Once you have configured a globus store, it is recommended that you install the `dataverse-globus app `_ to allow transfers in/out of Dataverse to be initated via the Dataverse user interface. Alternately, you can point your users to the :doc:`/developers/globus-api` for information about API support. +Once you have configured a globus store, or configured an S3 store for Globus access, it is recommended that you install the `dataverse-globus app `_ to allow transfers in/out of Dataverse to be initated via the Dataverse user interface. Alternately, you can point your users to the :doc:`/developers/globus-api` for information about API support. .. table:: :align: left From 44bd5b7fb6d697d356d857a73847e1637aaa5763 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 11 Dec 2023 11:19:46 -0500 Subject: [PATCH 350/414] add perf test results --- doc/release-notes/6.1-release-notes.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index a3b04749d68..b03a7a62baa 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -258,7 +258,7 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). +7\. Update Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). 7a\. For installations without custom or experimental metadata blocks: @@ -298,6 +298,10 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. +## Performance Testing Results +The results of performance testing can be found here: +https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit#gid=0 + ## Getting Help For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. From 173b8a7a067b392de8e1c900c3e1d9eb806c71d6 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 11 Dec 2023 11:25:44 -0500 Subject: [PATCH 351/414] fix backward comp Alternative Title --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index b03a7a62baa..5bc0df4640c 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -291,7 +291,7 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as ## Backward Incompatibilities -- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version +- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version. Alternative Title must now be passed as an array of strings rather than a single string ([alt title]) - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, ## Complete List of Changes From 1959f2ff22d9bbc4290a586fc49f1f49eccdbd04 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 11 Dec 2023 11:29:24 -0500 Subject: [PATCH 352/414] removed unneeded header --- doc/release-notes/6.1-release-notes.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 5bc0df4640c..6d3d1912f81 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -6,11 +6,8 @@ This release brings new features, enhancements, and bug fixes to the Dataverse s Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. ## Release Highlights (Major Upgrades, Breaking Changes) - This release contains major upgrades to core components. Detailed upgrade instructions can be found below. -## Detailed Release Highlights, New Features and Use Case Scenarios - ### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. From a4e25e17155896ae5c335ea8169229f248eaf22b Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 12:15:56 -0500 Subject: [PATCH 353/414] reorg 6.1 release notes, add globus #10151 --- doc/release-notes/6.1-release-notes.md | 262 +++++++++++++------------ 1 file changed, 137 insertions(+), 125 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 6d3d1912f81..475d4fc0887 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -5,57 +5,96 @@ Please note: To read these instructions in full, please go to https://github.com This release brings new features, enhancements, and bug fixes to the Dataverse software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. -## Release Highlights (Major Upgrades, Breaking Changes) -This release contains major upgrades to core components. Detailed upgrade instructions can be found below. +## Release highlights -### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started -Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). - The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. +### Guestbook at request + +Dataverse can now be configured (via the `dataverse.files.guestbook-at-request` option) to display any configured guestbook to users when they request restricted files (new functionality) or when they download files (previous behavior). + +The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default, showing guestbooks when files are downloaded, remains as it was in prior Dataverse versions. + +### Collection-level storage quotas + +This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. +Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of Dataverse 6.1. + +### Globus support + +Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible and for the case of referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Please note: + +- Globus functionality remains experimental/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities. +- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model. +- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus). +- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs) + +Backward incompatibilities: +- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism +- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. + +New JVM options: +- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). +- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. + +Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used + +Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support) + +### Alternative Title now allows multiple values + +Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below for details. + +### External tools: configure tools now available at the dataset level + +Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. + +### S3 out-of-band upload -### Dataverse installation can be now be configured to allow out-of-band upload In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. -### Alternative Title is made repeatable. -- One will need to update database with updated citation block. - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -- One will also need to update Solr schema: - Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml` - Reload Solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` +### JSON Schema for datasets -Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array. -For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) -### Collection Storage Size Quota Support --This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. -Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 +### OpenID Connect authentication provider improvements -### BagIT Export Configurations Updated -For BagIT export, it is now possible to configure the following information in bag-info.txt... +#### Using MicroProfile Config for provisioning -Source-Organization: Harvard Dataverse -Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA -Organization-Email: support@dataverse.harvard.edu +With this release it is possible to provision a single OIDC-based authentication provider +by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. -... using new JVM/MPCONFIG options: +If you are using an external OIDC provider component as an identity management system and/or broker +to other authentication providers such as Google, eduGain SAML and so on, this might make your +life easier during instance setups and reconfiguration. You no longer need to generate the +necessary JSON file. -- dataverse.bagit.sourceorg.name -- dataverse.bagit.sourceorg.address -- dataverse.bagit.sourceorg.email +#### Adding PKCE Support -Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. +[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) +Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable +support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) -For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt +### Solr improvements -### Improvements in the dataset versions API -- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions -- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output -- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. +As of this release, application-side support has been added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. -This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide. +Please see the "Installing Solr" section of the Installation Prerequisites guide. + +### New release of Dataverse Previewers (including a Markdown previewer) + +Version 1.4 of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. Please note: + +- SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. +- Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. +- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews + +### New or improved APIs + +The development of a [new UI for Dataverse](https://github.com/IQSS/dataverse-frontend) is driving the addition or improvement of many APIs. + +#### New API endpoints -### The following API endpoints have been added: - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - /api/files/{id}/downloadCount - /api/files/{id}/dataTables @@ -71,7 +110,33 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection. - downloadTmpFile (/api/admin/downloadTmpFile): For testing purposes, allows files to be downloaded from /tmp. -### Extended the existing endpoints: +#### Pagination of files in dataset versions + +- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions +- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output +- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. + +This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide. + + +#### DataFile API payload has been extended to include the following fields + +- tabularData: Boolean field to know if the DataFile is of tabular type +- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) +- friendlyType: String + +#### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering + +- Access status: through the `accessStatus` query parameter, which supports the following values: + - Public + - Restricted + - EmbargoedThenRestricted + - EmbargoedThenPublic +- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. +- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". + +#### Additional improvements to existing API endpoints + - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. - getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain file counts. Added support for filtering by optional criteria query parameter: - contentType @@ -93,25 +158,21 @@ This parameter applies a filter criteria to the operation and supports the follo - Can delete the dataset draft - getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. -### DataFile API payload has been extended to include the following fields: -- tabularData: Boolean field to know if the DataFile is of tabular type -- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) -- friendlyType: String +### Improvements for developers -### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering -- Access status: through the `accessStatus` query parameter, which supports the following values: - - Public - - Restricted - - EmbargoedThenRestricted - - EmbargoedThenPublic -- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. -- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". +- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (with the Payara Platform Tools plugin). For details, see https://guides.dataverse.org/en/6.1/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. +- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. +- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. +- With this release, we add a new type of testing to Dataverse: integration tests which are not end-to-end tests (like our API tests). Starting with OIDC authentication support, we test regularly on CI for working condition of both OIDC login options in UI and API. +- The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. +- The support for setting JVM options during testing has been improved for developers. You now may add the `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. +- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. +## Major use cases and infrastructure enhancements -### Misc -- Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. -- Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). -The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. +Changes and fixes in this release not already mentioned above include: + +- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue #9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports (which include the OAI-ORE metadata export file) have been updated to include information about the dataset version state, e.g. RELEASED or DEACCESSIONED @@ -125,68 +186,18 @@ Dataverse installations that have been using archival Bags may wish to update an existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) to generate updated versions. -- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews +- For BagIT export, it is now possible to configure the following information in bag-info.txt. (Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.) For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt + - Source-Organization from `dataverse.bagit.sourceorg.name`. + - Organization-Address from `dataverse.bagit.sourceorg.address`. + - Organization-Email from `dataverse.bagit.sourceorg.address`. - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. - - We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. -- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) -- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. - -### Solr Improvements -- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. - -Please see the "Installing Solr" section of the Installation Prerequisites guide. - - -### Development -- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. -For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. -- A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. -- `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is -also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. -- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. -- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. -- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. - -## OpenID Connect Authentication Provider Improvements - -### Using MicroProfile Config For Provisioning - -With this release it is possible to provision a single OIDC-based authentication provider -by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. - -If you are using an external OIDC provider component as an identity management system and/or broker -to other authentication providers such as Google, eduGain SAML and so on, this might make your -life easier during instance setups and reconfiguration. You no longer need to generate the -necessary JSON file. - -### Adding PKCE Support -[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) -Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable -support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) - -## Improved Testing - -With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests -like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition -of both OIDC login options in UI and API. - -The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. - -The support for setting JVM options during testing has been improved for developers. You now may add the -`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is -also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. - -As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. - -## New Configuration Options +## New configuration options - dataverse.auth.oidc.enabled - dataverse.auth.oidc.client-id @@ -199,8 +210,24 @@ As part of these testing improvements, the code coverage report file for unit te - dataverse.auth.oidc.pkce.max-cache-size - dataverse.auth.oidc.pkce.max-cache-age - dataverse.files.{driverId}.upload-out-of-band +- dataverse.files.globus-cache-maxage - dataverse.files.guestbook-at-request +## Backward incompatibilities + +- Since Alternative Title is now repeatable, the JSON you send to create or edit a dataset must be an array rather than a simple string. For example, instead of "value": "Alternative Title", you must send "value": ["Alternative Title1", "Alternative Title2"] +- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility. See above for details. +- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new JVM options `dataverse.bagit.sourceorg.name`, `dataverse.bagit.sourceorg.address`, and `dataverse.bagit.sourceorg.email`. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt +- See "Globus support" above for backward incompatibilies specific to Globus. + +## Complete list of changes + +For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. + +## Getting help + +For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. + ## Installation If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! @@ -209,7 +236,7 @@ Once you are in production, we would be delighted to update our [map of Datavers You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC). -## Upgrade Instructions +## Upgrade instructions Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. @@ -241,6 +268,8 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa - `$PAYARA/bin/asadmin deploy dataverse-6.1.war` +As noted above, deployment of the war file might take several minutes due a database migration script required for the new storage quotas feature. + 5\. Restart Payara - `service payara stop` @@ -255,7 +284,7 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -7\. Update Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). +7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). 7a\. For installations without custom or experimental metadata blocks: @@ -285,20 +314,3 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as - Restart Solr instance (usually `service solr restart` depending on solr/OS) 8\. Run ReExportAll to update dataset metadata exports. Follow the directions in the [Admin Guide](http://guides.dataverse.org/en/5.14/admin/metadataexport.html#batch-exports-through-the-api). - - -## Backward Incompatibilities -- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version. Alternative Title must now be passed as an array of strings rather than a single string ([alt title]) -- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, - -## Complete List of Changes - -For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. - -## Performance Testing Results -The results of performance testing can be found here: -https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit#gid=0 - -## Getting Help - -For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. From 011b9291e6f694631d237bd047c3a170e6e93a2e Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 13:58:08 -0500 Subject: [PATCH 354/414] remove globus snippet (already added) #10151 --- doc/release-notes/10162-globus-support.md | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 doc/release-notes/10162-globus-support.md diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md deleted file mode 100644 index 60670b5b101..00000000000 --- a/doc/release-notes/10162-globus-support.md +++ /dev/null @@ -1,19 +0,0 @@ -Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, -and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. -Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) -- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities. -- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model -- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus) -- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs) - -Backward Incompatibilities: -- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism -- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. - -New JVM Options: -- A new 'globus' store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). -- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. - - - -Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used From 3e32f42959dce41e9c21c9e2285fdf719b048dc0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 14:57:52 -0500 Subject: [PATCH 355/414] link to guides in more places, other tweaks #10151 --- doc/release-notes/6.1-release-notes.md | 43 +++++++++++++------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 475d4fc0887..fab11ce4959 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -13,12 +13,17 @@ Dataverse can now be configured (via the `dataverse.files.guestbook-at-request` The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default, showing guestbooks when files are downloaded, remains as it was in prior Dataverse versions. +For details, see [dataverse.files.guestbook-at-request](https://guides.dataverse.org/en/6.1/installation/config.html#dataverse-files-guestbook-at-request) and PR #9599. + ### Collection-level storage quotas This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. + Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of Dataverse 6.1. -### Globus support +For details, see [Storage Quotas for Collections](https://guides.dataverse.org/en/6.1/admin/collectionquotas.html) in the Admin Guide. + +### Globus support (experimental), continued Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible and for the case of referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Please note: @@ -32,54 +37,50 @@ Backward incompatibilities: - The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. New JVM options: -- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). +- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage](https://guides.dataverse.org/en/6.1/installation/config.html#file-storage) section of the Installation Guide. - dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used -Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support) +Further details can be found in the [Big Data Support](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support) section of the Developer Guide. ### Alternative Title now allows multiple values -Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below for details. +Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below and PR #9440 for details. ### External tools: configure tools now available at the dataset level -Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. +Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See [External Tools](https://guides.dataverse.org/en/6.1/admin/external-tools.html#dataset-level-configure-tools) in the Admin Guide and PR #9925. ### S3 out-of-band upload In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). -With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. +With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://guides.dataverse.org/en/6.1/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://guides.dataverse.org/en/6.1/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. ### JSON Schema for datasets -Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.1/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10109. -### OpenID Connect authentication provider improvements +### OpenID Connect (OIDC) improvements #### Using MicroProfile Config for provisioning -With this release it is possible to provision a single OIDC-based authentication provider -by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. +With this release it is possible to provision a single OIDC-based authentication provider by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. -If you are using an external OIDC provider component as an identity management system and/or broker -to other authentication providers such as Google, eduGain SAML and so on, this might make your -life easier during instance setups and reconfiguration. You no longer need to generate the -necessary JSON file. +If you are using an external OIDC provider component as an identity management system and/or broker to other authentication providers such as Google, eduGain SAML and so on, this might make your life easier during instance setups and reconfiguration. You no longer need to generate the necessary JSON file. #### Adding PKCE Support -[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) -Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable -support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) +Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) + +For both features, see the [OIDC](https://guides.dataverse.org/en/6.0/installation/oidc.html) section of the Installation Guide and PR #9273. ### Solr improvements As of this release, application-side support has been added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. -Please see the "Installing Solr" section of the Installation Prerequisites guide. +Please see the [Installing Solr](https://guides.dataverse.org/en/6.1/installation/prerequisites.html#installing-solr) section of the Installation Guide. ### New release of Dataverse Previewers (including a Markdown previewer) @@ -87,7 +88,7 @@ Version 1.4 of the standard Dataverse Previewers from https://github/com/gdcc/da - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. -- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews +- There is now a [Markdown (.md)](https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews) previewer. ### New or improved APIs @@ -172,7 +173,7 @@ This parameter applies a filter criteria to the operation and supports the follo Changes and fixes in this release not already mentioned above include: -- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue #9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. +- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. See PR #10142. - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports (which include the OAI-ORE metadata export file) have been updated to include information about the dataset version state, e.g. RELEASED or DEACCESSIONED @@ -184,7 +185,7 @@ recreate datasets from archival Bags will start indicating which version(s) of t OAI_ORE format they can read. Dataverse installations that have been using archival Bags may wish to update any existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse -[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) +[archival Bag export API](https://guides.dataverse.org/en/6.1/installation/config.html#bagit-export-api-calls) to generate updated versions. - For BagIT export, it is now possible to configure the following information in bag-info.txt. (Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.) For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt - Source-Organization from `dataverse.bagit.sourceorg.name`. From 92a298da25c03822c848e5a43253f039193665f9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 15:42:55 -0500 Subject: [PATCH 356/414] add missing new config options and sort #10151 --- doc/release-notes/6.1-release-notes.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index fab11ce4959..1e09a207104 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -200,25 +200,28 @@ See also #10060. ## New configuration options -- dataverse.auth.oidc.enabled +- dataverse.auth.oidc.auth-server-url - dataverse.auth.oidc.client-id - dataverse.auth.oidc.client-secret -- dataverse.auth.oidc.auth-server-url +- dataverse.auth.oidc.enabled - dataverse.auth.oidc.pkce.enabled +- dataverse.auth.oidc.pkce.max-cache-age +- dataverse.auth.oidc.pkce.max-cache-size - dataverse.auth.oidc.pkce.method -- dataverse.auth.oidc.title - dataverse.auth.oidc.subtitle -- dataverse.auth.oidc.pkce.max-cache-size -- dataverse.auth.oidc.pkce.max-cache-age -- dataverse.files.{driverId}.upload-out-of-band +- dataverse.auth.oidc.title +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.name - dataverse.files.globus-cache-maxage - dataverse.files.guestbook-at-request +- dataverse.files.{driverId}.upload-out-of-band ## Backward incompatibilities - Since Alternative Title is now repeatable, the JSON you send to create or edit a dataset must be an array rather than a simple string. For example, instead of "value": "Alternative Title", you must send "value": ["Alternative Title1", "Alternative Title2"] - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility. See above for details. -- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new JVM options `dataverse.bagit.sourceorg.name`, `dataverse.bagit.sourceorg.address`, and `dataverse.bagit.sourceorg.email`. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt +- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new `dataverse.bagit` JVM options mentioned above. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt - See "Globus support" above for backward incompatibilies specific to Globus. ## Complete list of changes From 80634c7a59d7bfce4ab0e871d80d34f446579123 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 15:54:30 -0500 Subject: [PATCH 357/414] address feedback from review #9919 --- doc/sphinx-guides/source/developers/performance.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/developers/performance.rst b/doc/sphinx-guides/source/developers/performance.rst index aa50cd6e40c..46c152f322e 100644 --- a/doc/sphinx-guides/source/developers/performance.rst +++ b/doc/sphinx-guides/source/developers/performance.rst @@ -116,12 +116,12 @@ We'd like to rate limit commands (CreateDataset, etc.) so that we can keep them Solr ~~~~ -While in the past Solr performance hasn't been much of a concern, in recent years we've noticed performance problems when Harvard Dataverse is under load. We are investigating in `#9635 `_. +While in the past Solr performance hasn't been much of a concern, in recent years we've noticed performance problems when Harvard Dataverse is under load. Improvements were made in `PR #10050 `_, for example. Datasets with Large Numbers of Files or Versions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We'd like to scale Dataverse to better handle large number of files or versions (`#9763 `_). +We'd like to scale Dataverse to better handle large number of files or versions. Progress was made in `PR #9883 `_. Withstanding Bots ~~~~~~~~~~~~~~~~~ @@ -183,7 +183,7 @@ Most likely there is training available that is oriented toward performance. The Learn from the Community How They Monitor Performance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Some members of the Dataverse community are likely users of newish tools like the ELK stack (Elasticsearch, Logstash, and Kibana), the TICK stack (Telegraph InfluxDB Chronograph and Kapacitor), GoAccess, Prometheus, Graphite, and more we haven't even heard of. In the :doc:`/admin/monitoring` section of the Admin Guide, we already encourage the community to share findings (, but we could dedicate time to this topic at our annual meeting or community calls. +Some members of the Dataverse community are likely users of newish tools like the ELK stack (Elasticsearch, Logstash, and Kibana), the TICK stack (Telegraph InfluxDB Chronograph and Kapacitor), GoAccess, Prometheus, Graphite, and more we haven't even heard of. In the :doc:`/admin/monitoring` section of the Admin Guide, we already encourage the community to share findings, but we could dedicate time to this topic at our annual meeting or community calls. Teach the Community to Do Performance Testing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 3d6343eca2846edca97e4d9699f3305fb7c19c62 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 16:09:46 -0500 Subject: [PATCH 358/414] mention configurable docroot #10151 --- doc/release-notes/6.1-release-notes.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 1e09a207104..1279d09a023 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -195,6 +195,7 @@ to generate updated versions. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. +- It's now possible to configure the docroot, which holds collection logos and more. See [dataverse.files.docroot](https://guides.dataverse.org/en/6.1/installation/config.html#dataverse-files-docroot) in the Installation Guide and PR #9819. - We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. @@ -213,6 +214,7 @@ See also #10060. - dataverse.bagit.sourceorg.address - dataverse.bagit.sourceorg.address - dataverse.bagit.sourceorg.name +- dataverse.files.docroot - dataverse.files.globus-cache-maxage - dataverse.files.guestbook-at-request - dataverse.files.{driverId}.upload-out-of-band From fa32ef5a413f6b0fbfab7d6e96e602a31bc18ac4 Mon Sep 17 00:00:00 2001 From: Guillermo Portas Date: Tue, 12 Dec 2023 11:36:52 +0000 Subject: [PATCH 359/414] Update doc/sphinx-guides/source/api/native-api.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/api/native-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 99438520120..1e86f24356b 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2686,7 +2686,7 @@ In particular, the user permissions that this API call checks, returned as boole curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/userPermissions" -Know if a User can download at least one File from a Dataset Version +Know If a User Can Download at Least One File from a Dataset Version ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This API call allows to know if the calling user can download at least one file of a dataset version. From 476977b48925ae6eae4dabf69b0de0d7d40d6841 Mon Sep 17 00:00:00 2001 From: Guillermo Portas Date: Tue, 12 Dec 2023 11:37:01 +0000 Subject: [PATCH 360/414] Update doc/sphinx-guides/source/api/native-api.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/api/native-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 1e86f24356b..9ceeb4410ef 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2689,7 +2689,7 @@ In particular, the user permissions that this API call checks, returned as boole Know If a User Can Download at Least One File from a Dataset Version ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This API call allows to know if the calling user can download at least one file of a dataset version. +This API endpoint indicates if the calling user can download at least one file from a dataset version. Note that Shibboleth group permissions are not considered. .. code-block:: bash From 64861afbc11c4475ca3d85e729f4b73e962d5efa Mon Sep 17 00:00:00 2001 From: Guillermo Portas Date: Tue, 12 Dec 2023 11:37:36 +0000 Subject: [PATCH 361/414] Update doc/release-notes/10155-datasets-can-download-at-least-one-file.md Co-authored-by: Philip Durbin --- .../10155-datasets-can-download-at-least-one-file.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/10155-datasets-can-download-at-least-one-file.md b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md index 566d505f7ca..a0b0d02310a 100644 --- a/doc/release-notes/10155-datasets-can-download-at-least-one-file.md +++ b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md @@ -1,3 +1,3 @@ The getCanDownloadAtLeastOneFile (/api/datasets/{id}/versions/{versionId}/canDownloadAtLeastOneFile) endpoint has been created. -This endpoint allows to know if the calling user can download at least one file of a particular dataset version. +This API endpoint indicates if the calling user can download at least one file from a dataset version. Note that Shibboleth group permissions are not considered. From 39e4bcee0f164854301b45f0ba6cbd4e11b4cf5c Mon Sep 17 00:00:00 2001 From: GPortas Date: Tue, 12 Dec 2023 13:42:46 +0000 Subject: [PATCH 362/414] Fixed: minio storage volume mapping --- docker-compose-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 5265a6b7c2d..6f8decc0dfb 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -209,7 +209,7 @@ services: networks: - dataverse volumes: - - minio_storage:/data + - ./docker-dev-volumes/minio_storage:/data environment: MINIO_ROOT_USER: 4cc355_k3y MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y From 0c279adc3e93bd09bedc08a3f1bda48876fc1de3 Mon Sep 17 00:00:00 2001 From: GPortas Date: Tue, 12 Dec 2023 13:50:08 +0000 Subject: [PATCH 363/414] Removed: sleep calls from testGetCanDownloadAtLeastOneFile IT --- .../java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index b2cf5c75467..f36b93b85ab 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -4225,7 +4225,7 @@ public void testGetGlobusUploadParameters() { } @Test - public void testGetCanDownloadAtLeastOneFile() throws InterruptedException { + public void testGetCanDownloadAtLeastOneFile() { Response createUserResponse = UtilIT.createRandomUser(); createUserResponse.then().assertThat().statusCode(OK.getStatusCode()); String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); @@ -4252,9 +4252,6 @@ public void testGetCanDownloadAtLeastOneFile() throws InterruptedException { Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken); publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); - // Make sure the dataset is published - Thread.sleep(3000); - // Create a second user to call the getCanDownloadAtLeastOneFile method Response createSecondUserResponse = UtilIT.createRandomUser(); createSecondUserResponse.then().assertThat().statusCode(OK.getStatusCode()); @@ -4275,9 +4272,6 @@ public void testGetCanDownloadAtLeastOneFile() throws InterruptedException { publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken); publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); - // Make sure the dataset is published - Thread.sleep(3000); - // Call with a valid dataset id when a file is restricted and the user does not have access canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); From 960a20c79dc8a3292ff3d26973d8e35d8a4f481c Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 12 Dec 2023 14:06:21 -0500 Subject: [PATCH 364/414] #10168 fix error response status --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index b3bfc476423..05355cbbc68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4288,7 +4288,7 @@ public Response getDatasetVersionArchivalStatus(@Context ContainerRequestContext headers); if (dsv.getArchivalCopyLocation() == null) { - return error(Status.NO_CONTENT, "This dataset version has not been archived"); + return error(Status.NOT_FOUND, "This dataset version has not been archived"); } else { JsonObject status = JsonUtil.getJsonObject(dsv.getArchivalCopyLocation()); return ok(status); From 40e5d39c73ec2097fb16d65e8fff33078168498b Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 12 Dec 2023 14:53:45 -0500 Subject: [PATCH 365/414] how to test Docker images made during a release --- .../source/developers/making-releases.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index 23c4773a06e..432b4ca2672 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -67,6 +67,19 @@ Once important tests have passed (compile, unit tests, etc.), merge the pull req If this is a hotfix release, skip this whole "merge develop to master" step (the "develop" branch is not involved until later). +(Optional) Test Docker Images +----------------------------- + +After the "master" branch has been updated and the GitHub Action to build and push Docker images has run (see `PR #9776 `_), go to https://hub.docker.com/u/gdcc and make sure the "alpha" tag for the following images has been updated: + +- https://hub.docker.com/r/gdcc/base +- https://hub.docker.com/r/gdcc/dataverse +- https://hub.docker.com/r/gdcc/configbaker + +To test these images against our API test suite, go to the "alpha" workflow at https://github.com/gdcc/api-test-runner/actions/workflows/alpha.yml and run it. + +If there are failures, additional dependencies or settings may have been added to the "develop" workflow. Copy them over and try again. + Build the Guides for the Release -------------------------------- From daf89261174600b1db106974cc941213fa0b36bd Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 12 Dec 2023 15:37:27 -0500 Subject: [PATCH 366/414] #10168 update integration tests --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 928574eb82b..7efd44b9533 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3291,7 +3291,8 @@ public void testArchivalStatusAPI() throws IOException { //Verify the status is empty Response nullStatus = UtilIT.getDatasetVersionArchivalStatus(datasetId, "1.0", apiToken); - nullStatus.then().assertThat().statusCode(NO_CONTENT.getStatusCode()); + nullStatus.prettyPrint(); + nullStatus.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); //Set it Response setStatus = UtilIT.setDatasetVersionArchivalStatus(datasetId, "1.0", apiToken, "pending", @@ -3309,7 +3310,7 @@ public void testArchivalStatusAPI() throws IOException { //Make sure it's gone Response nullStatus2 = UtilIT.getDatasetVersionArchivalStatus(datasetId, "1.0", apiToken); - nullStatus2.then().assertThat().statusCode(NO_CONTENT.getStatusCode()); + nullStatus2.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); } From ea644b89a3149ff8599fe3fcaa3a2bf6f5804e71 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 13 Dec 2023 14:16:47 -0500 Subject: [PATCH 367/414] add "message sent" success message #2638 --- src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java | 2 ++ src/main/java/propertyFiles/Bundle.properties | 1 + src/main/webapp/contactFormFragment.xhtml | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java index 6be768321c4..68912969003 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java +++ b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.Optional; @@ -217,6 +218,7 @@ public String sendMessage() { } logger.fine("sending feedback: " + feedback); mailService.sendMail(feedback.getFromEmail(), feedback.getToEmail(), feedback.getCcEmail(), feedback.getSubject(), feedback.getBody()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("contact.sent")); return null; } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 10576c0c116..0c6ce979a94 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -184,6 +184,7 @@ contact.context.file.intro={0}\n\nYou have just been sent the following message contact.context.file.ending=\n\n---\n\n{0}\n{1}\n\nGo to file {2}/file.xhtml?fileId={3}\n\nYou received this email because you have been listed as a contact for the dataset. If you believe this was an error, please contact {4} at {5}. To respond directly to the individual who sent the message, simply reply to this email. contact.context.support.intro={0},\n\nThe following message was sent from {1}.\n\n---\n\n contact.context.support.ending=\n\n---\n\nMessage sent from Support contact form. +contact.sent=Message sent. # dataverseuser.xhtml account.info=Account Information diff --git a/src/main/webapp/contactFormFragment.xhtml b/src/main/webapp/contactFormFragment.xhtml index cb4eb3d0872..8950ec5acf8 100644 --- a/src/main/webapp/contactFormFragment.xhtml +++ b/src/main/webapp/contactFormFragment.xhtml @@ -81,7 +81,7 @@
    + update="@form,messagePanel" oncomplete="if (args && !args.validationFailed) PF('contactForm').hide();" actionListener="#{sendFeedbackDialog.sendMessage}">
    From 4db74b6e5ddd3cf7f2ee49b94b9b229e2746bd35 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 5 Jan 2024 16:20:27 -0500 Subject: [PATCH 397/414] how to write release note snippets #9264 --- .../source/developers/making-releases.rst | 10 ++-- .../source/developers/version-control.rst | 54 ++++++++++++++++--- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index e73811a77e1..6b94282d55e 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -14,16 +14,18 @@ See :doc:`version-control` for background on our branching strategy. The steps below describe making both regular releases and hotfix releases. +.. _write-release-notes: + Write Release Notes ------------------- -Developers express the need for an addition to release notes by creating a file in ``/doc/release-notes`` containing the name of the issue they're working on. The name of the branch could be used for the filename with ".md" appended (release notes are written in Markdown) such as ``5053-apis-custom-homepage.md``. +Developers express the need for an addition to release notes by creating a "release note snippet" in ``/doc/release-notes`` containing the name of the issue they're working on. The name of the branch could be used for the filename with ".md" appended (release notes are written in Markdown) such as ``5053-apis-custom-homepage.md``. See :ref:`writing-release-note-snippets` for how this is described for contributors. -The task at or near release time is to collect these notes into a single doc. +The task at or near release time is to collect these snippets into a single file. - Create an issue in GitHub to track the work of creating release notes for the upcoming release. -- Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the issue-specific release notes mentioned above. -- Delete the previously-created, issue-specific release notes as the content is added to the main release notes file. +- Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the release note snippets mentioned above. +- Delete the release note snippets as the content is added to the main release notes file. - Include instructions to describe the steps required to upgrade the application from the previous version. These must be customized for release numbers and special circumstances such as changes to metadata blocks and infrastructure. - Take the release notes .md through the regular Code Review and QA process. diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 91f59c76e61..12f3d5b81fd 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -65,23 +65,65 @@ The example of creating a pull request below has to do with fixing an important Find or Create a GitHub Issue ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Issue is a bug (unexpected behavior) or a new feature in Dataverse, to know how to find or create an issue in dataverse please see https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md +An issue represents a bug (unexpected behavior) or a new feature in Dataverse. We'll use the issue number in the branch we create for our pull request. -For guidance on which issue to work on, please ask! with email to support@dataverse.org +Finding GitHub Issues to Work On +******************************** -Let's say you want to tackle https://github.com/IQSS/dataverse/issues/3728 which points out a typo in a page of the Dataverse Software's documentation. +Assuming this is your first contribution to Dataverse, you should start with something small. The following issue labels might be helpful in your search: + +- `good first issue `_ (these appear at https://github.com/IQSS/dataverse/contribute ) +- `hacktoberfest `_ +- `Help Wanted: Code `_ +- `Help Wanted: Documentation `_ + +For guidance on which issue to work on, please ask! :ref:`getting-help-developers` explains how to get in touch. + +Creating GitHub Issues to Work On +********************************* + +You are very welcome to create a GitHub issue to work on. However, for significant changes, please reach out (see :ref:`getting-help-developers`) to make sure the team and community agree with the proposed change. + +For small changes and especially typo fixes, please don't worry about reaching out first. + +Communicate Which Issue You Are Working On +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the issue you can simply leave a comment to say you're working on it. If you tell us your GitHub username we are happy to add you to the "read only" team at https://github.com/orgs/IQSS/teams/dataverse-readonly/members so that we can assign the issue to you while you're working on it. You can also tell us if you'd like to be added to the `Dataverse Community Contributors spreadsheet `_. -Create a New Branch off the develop Branch +Create a New Branch Off the develop Branch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Always create your feature branch from the latest code in develop, pulling the latest code if necessary. As mentioned above, your branch should have a name like "3728-doc-apipolicy-fix" that starts with the issue number you are addressing, and ends with a short, descriptive name. Dashes ("-") and underscores ("_") in your branch name are ok, but please try to avoid other special characters such as ampersands ("&") that have special meaning in Unix shells. +Always create your feature branch from the latest code in develop, pulling the latest code if necessary. As mentioned above, your branch should have a name like "3728-doc-apipolicy-fix" that starts with the issue number you are addressing (e.g. `#3728 `_) and ends with a short, descriptive name. Dashes ("-") and underscores ("_") in your branch name are ok, but please try to avoid other special characters such as ampersands ("&") that have special meaning in Unix shells. Commit Your Change to Your New Branch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Making a commit (or several commits) to that branch, enter a description of the changes you have made. Ideally the first line of your commit message includes the number of the issue you are addressing, such as ``Fixed BlockedApiPolicy #3728``. +For each commit to that branch, try to include the issue number along with a summary in the first line of the commit message, such as ``Fixed BlockedApiPolicy #3728``. You are welcome to write longer descriptions in the body as well! + +.. _writing-release-note-snippets: + +Writing a Release Note Snippet +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We highly value your insight as a contributor when in comes to describing your work in our release notes. Not every pull request will be mentioned in release notes but most are. + +As described at :ref:`write-release-notes`, at release time we compile together release note "snippets" into the final release notes. + +Here's how to add a release note snippet to your pull request: + +- Create a Markdown file under ``doc/release-notes``. You can reuse the name of your branch and append ".md" to it, e.g. ``3728-doc-apipolicy-fix.md`` +- Edit the snippet to include anything you think should be mentioned in the release notes, such as: + + - Descriptions of new features + - Explanations of bugs fixed + - New configuration settings + - Upgrade instructions + - Etc. + +Release note snippets do not need to be long. For a new feature, a single line description might be enough. Please note that your release note will likely be edited (expanded or shortened) when the final release notes are being created. Push Your Branch to GitHub ~~~~~~~~~~~~~~~~~~~~~~~~~~ From 826d4bdcd2d0418c8d65c8409107de0d66f6dd19 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Jan 2024 17:46:26 -0500 Subject: [PATCH 398/414] per QA --- doc/sphinx-guides/source/developers/globus-api.rst | 1 + .../java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index de9df06a798..2f922fb1fc0 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -2,6 +2,7 @@ Globus Transfer API =================== The Globus API addresses three use cases: + * Transfer to a Dataverse-managed Globus endpoint (File-based or using the Globus S3 Connector) * Reference of files that will remain in a remote Globus endpoint * Transfer from a Dataverse-managed Globus endpoint diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 61884045f35..3e60441850b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -240,7 +240,7 @@ private int makeDir(GlobusEndpoint endpoint, String dir) { MakeRequestResponse result = null; String body = "{\"DATA_TYPE\":\"mkdir\",\"path\":\"" + dir + "\"}"; try { - logger.info(body); + logger.fine(body); URL url = new URL( "https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint.getId() + "/mkdir"); result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", body); From dbab6ca9269a93bd7d292b37b00c42dc0fbad55f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 8 Jan 2024 10:30:25 -0500 Subject: [PATCH 399/414] use name@email.xyz to match citation block #2638 From datasetfieldtype.datasetContactEmail.watermark --- src/main/java/propertyFiles/Bundle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index b1c38e52496..ece3f070cdd 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -155,7 +155,7 @@ contact.support=Support contact.from=From contact.from.required=User email is required. contact.from.invalid=Email is invalid. -contact.from.emailPlaceholder=valid@email.org +contact.from.emailPlaceholder=name@email.xyz contact.subject=Subject contact.subject.required=Subject is required. contact.subject.selectTab.top=Select subject... From 2b1e5dd4bda6788f644c2737cf56310e7eaefb7d Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 8 Jan 2024 16:10:58 -0500 Subject: [PATCH 400/414] Extend getVersionFiles API endpoint to include the total file count --- .../iq/dataverse/api/AbstractApiBean.java | 64 +++----- .../harvard/iq/dataverse/api/Datasets.java | 146 +++++------------- .../harvard/iq/dataverse/api/DatasetsIT.java | 98 ++++++------ 3 files changed, 108 insertions(+), 200 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 58565bcc9d6..2a2843c0494 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -1,29 +1,6 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetLinkingDataverse; -import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersionServiceBean; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseLinkingDataverse; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseRoleServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.EjbDataverseEngine; -import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; -import edu.harvard.iq.dataverse.MetadataBlock; -import edu.harvard.iq.dataverse.MetadataBlockServiceBean; -import edu.harvard.iq.dataverse.PermissionServiceBean; -import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; -import edu.harvard.iq.dataverse.UserNotificationServiceBean; -import edu.harvard.iq.dataverse.UserServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; @@ -40,8 +17,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; -import edu.harvard.iq.dataverse.metrics.MetricsServiceBean; import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean; +import edu.harvard.iq.dataverse.metrics.MetricsServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -51,33 +28,30 @@ import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; -import java.io.InputStream; -import java.net.URI; -import java.util.Arrays; -import java.util.Collections; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.logging.Level; -import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.ejb.EJBException; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonArrayBuilder; -import jakarta.json.JsonException; -import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonValue; +import jakarta.json.*; import jakarta.json.JsonValue.ValueType; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; import jakarta.persistence.PersistenceContext; import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.container.ContainerRequestContext; -import jakarta.ws.rs.core.*; +import jakarta.ws.rs.core.Context; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.Response.ResponseBuilder; import jakarta.ws.rs.core.Response.Status; +import java.io.InputStream; +import java.net.URI; +import java.util.Arrays; +import java.util.Collections; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.logging.Level; +import java.util.logging.Logger; + import static org.apache.commons.lang3.StringUtils.isNumeric; /** @@ -661,7 +635,13 @@ protected Response ok( JsonArrayBuilder bld ) { .add("data", bld).build()) .type(MediaType.APPLICATION_JSON).build(); } - + protected Response ok( JsonArrayBuilder bld , long totalCount) { + return Response.ok(Json.createObjectBuilder() + .add("status", ApiConstants.STATUS_OK) + .add("total_count", totalCount) + .add("data", bld).build()) + .type(MediaType.APPLICATION_JSON).build(); + } protected Response ok( JsonArray ja ) { return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 094f2b88c92..56b9e8df319 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1,9 +1,11 @@ package edu.harvard.iq.dataverse.api; +import com.amazonaws.services.s3.model.PartETag; import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; import edu.harvard.iq.dataverse.api.auth.AuthRequired; +import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; @@ -13,6 +15,7 @@ import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.dataaccess.*; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; @@ -23,92 +26,47 @@ import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand; -import edu.harvard.iq.dataverse.engine.command.impl.AddLockCommand; -import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreatePrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CuratePublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeaccessionDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetLinkingDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeletePrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetPrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ImportFromFileSystemCommand; -import edu.harvard.iq.dataverse.engine.command.impl.LinkDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListRoleAssignments; -import edu.harvard.iq.dataverse.engine.command.impl.ListVersionsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.MoveDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult; -import edu.harvard.iq.dataverse.engine.command.impl.RemoveLockCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ReturnDatasetToAuthorCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SetDatasetCitationDateCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SetCurationStatusCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SubmitDatasetForReviewCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetTargetURLCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; +import edu.harvard.iq.dataverse.engine.command.impl.*; import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.globus.GlobusUtil; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.privateurl.PrivateUrl; -import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; -import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; -import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; -import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; -import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDvObjectPIDMetadataCommand; -import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitations; -import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitationsServiceBean; -import edu.harvard.iq.dataverse.makedatacount.DatasetMetrics; -import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; +import edu.harvard.iq.dataverse.makedatacount.*; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import edu.harvard.iq.dataverse.metrics.MetricsUtil; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.ArchiverUtil; -import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.EjbUtil; -import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.MarkupChecker; -import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.*; import edu.harvard.iq.dataverse.util.bagit.OREMap; -import edu.harvard.iq.dataverse.util.json.JSONLDUtil; -import edu.harvard.iq.dataverse.util.json.JsonLDTerm; -import edu.harvard.iq.dataverse.util.json.JsonParseException; -import edu.harvard.iq.dataverse.util.json.JsonUtil; -import edu.harvard.iq.dataverse.util.SignpostingResources; -import edu.harvard.iq.dataverse.search.IndexServiceBean; -import static edu.harvard.iq.dataverse.api.ApiConstants.*; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; +import edu.harvard.iq.dataverse.util.json.*; import edu.harvard.iq.dataverse.workflow.Workflow; import edu.harvard.iq.dataverse.workflow.WorkflowContext; -import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType; -import edu.harvard.iq.dataverse.globus.GlobusServiceBean; -import edu.harvard.iq.dataverse.globus.GlobusUtil; +import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; +import jakarta.ejb.EJB; +import jakarta.ejb.EJBException; +import jakarta.inject.Inject; +import jakarta.json.*; +import jakarta.json.stream.JsonParsingException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import jakarta.ws.rs.*; +import jakarta.ws.rs.container.ContainerRequestContext; +import jakarta.ws.rs.core.*; +import jakarta.ws.rs.core.Response.Status; +import org.apache.commons.lang3.StringUtils; +import org.glassfish.jersey.media.multipart.FormDataBodyPart; +import org.glassfish.jersey.media.multipart.FormDataContentDisposition; +import org.glassfish.jersey.media.multipart.FormDataParam; + import java.io.IOException; import java.io.InputStream; import java.net.URI; @@ -117,45 +75,21 @@ import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.LocalDateTime; -import java.util.*; -import java.util.concurrent.*; -import java.util.function.Predicate; import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.util.*; import java.util.Map.Entry; +import java.util.concurrent.ExecutionException; +import java.util.function.Predicate; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; import java.util.stream.Collectors; -import jakarta.ejb.EJB; -import jakarta.ejb.EJBException; -import jakarta.inject.Inject; -import jakarta.json.*; -import jakarta.json.stream.JsonParsingException; -import jakarta.servlet.http.HttpServletRequest; -import jakarta.servlet.http.HttpServletResponse; -import jakarta.ws.rs.BadRequestException; -import jakarta.ws.rs.Consumes; -import jakarta.ws.rs.DELETE; -import jakarta.ws.rs.DefaultValue; -import jakarta.ws.rs.GET; -import jakarta.ws.rs.NotAcceptableException; -import jakarta.ws.rs.POST; -import jakarta.ws.rs.PUT; -import jakarta.ws.rs.Path; -import jakarta.ws.rs.PathParam; -import jakarta.ws.rs.Produces; -import jakarta.ws.rs.QueryParam; -import jakarta.ws.rs.container.ContainerRequestContext; -import jakarta.ws.rs.core.*; -import jakarta.ws.rs.core.Response.Status; + +import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; -import org.apache.commons.lang3.StringUtils; -import org.glassfish.jersey.media.multipart.FormDataBodyPart; -import org.glassfish.jersey.media.multipart.FormDataContentDisposition; -import org.glassfish.jersey.media.multipart.FormDataParam; -import com.amazonaws.services.s3.model.PartETag; -import edu.harvard.iq.dataverse.settings.JvmSettings; @Path("datasets") public class Datasets extends AbstractApiBean { @@ -546,7 +480,9 @@ public Response getVersionFiles(@Context ContainerRequestContext crc, } catch (IllegalArgumentException e) { return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus))); } - return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria))); + // TODO: should we count the total every time or only when offset = 0? + return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)), + datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria)); }, getRequestUser(crc)); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 200cfbaf1ff..ace69a6c606 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -1,77 +1,66 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersionFilesServiceBean; import edu.harvard.iq.dataverse.FileSearchCriteria; -import io.restassured.RestAssured; -import static edu.harvard.iq.dataverse.DatasetVersion.ARCHIVE_NOTE_MAX_LENGTH; -import static edu.harvard.iq.dataverse.api.ApiConstants.*; -import static io.restassured.RestAssured.given; -import io.restassured.path.json.JsonPath; -import io.restassured.http.ContentType; -import io.restassured.response.Response; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.logging.Logger; -import org.apache.commons.lang3.RandomStringUtils; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.skyscreamer.jsonassert.JSONAssert; -import org.junit.jupiter.api.Disabled; -import jakarta.json.JsonObject; -import static jakarta.ws.rs.core.Response.Status.CREATED; -import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; -import static jakarta.ws.rs.core.Response.Status.OK; -import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; -import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; -import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; -import static jakarta.ws.rs.core.Response.Status.METHOD_NOT_ALLOWED; -import static jakarta.ws.rs.core.Response.Status.CONFLICT; -import static jakarta.ws.rs.core.Response.Status.NO_CONTENT; -import edu.harvard.iq.dataverse.DataFile; -import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; import edu.harvard.iq.dataverse.authorization.DataverseRole; +import edu.harvard.iq.dataverse.authorization.groups.impl.builtin.AuthenticatedUsers; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIOTest; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.exception.ExceptionUtils; -import io.restassured.parsing.Parser; -import static io.restassured.path.json.JsonPath.with; -import io.restassured.path.xml.XmlPath; -import static edu.harvard.iq.dataverse.api.UtilIT.equalToCI; -import edu.harvard.iq.dataverse.authorization.groups.impl.builtin.AuthenticatedUsers; import edu.harvard.iq.dataverse.datavariable.VarGroup; import edu.harvard.iq.dataverse.datavariable.VariableMetadata; import edu.harvard.iq.dataverse.datavariable.VariableMetadataDDIParser; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringReader; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.Files; +import io.restassured.RestAssured; +import io.restassured.http.ContentType; +import io.restassured.parsing.Parser; +import io.restassured.path.json.JsonPath; +import io.restassured.path.xml.XmlPath; +import io.restassured.response.Response; import jakarta.json.Json; import jakarta.json.JsonArray; +import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.ws.rs.core.Response.Status; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.exception.ExceptionUtils; +import org.hamcrest.CoreMatchers; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.skyscreamer.jsonassert.JSONAssert; + import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.DatasetVersion.ARCHIVE_NOTE_MAX_LENGTH; +import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; +import static edu.harvard.iq.dataverse.api.UtilIT.equalToCI; +import static io.restassured.RestAssured.given; +import static io.restassured.path.json.JsonPath.with; +import static jakarta.ws.rs.core.Response.Status.*; import static java.lang.Thread.sleep; -import org.hamcrest.CoreMatchers; -import static org.hamcrest.CoreMatchers.containsString; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.hasItems; -import static org.hamcrest.CoreMatchers.startsWith; -import static org.hamcrest.CoreMatchers.nullValue; +import static org.hamcrest.CoreMatchers.*; import static org.hamcrest.Matchers.contains; import static org.junit.jupiter.api.Assertions.*; @@ -3548,7 +3537,9 @@ public void getVersionFiles() throws IOException, InterruptedException { getVersionFilesResponsePaginated.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].label", equalTo(testFileName1)) - .body("data[1].label", equalTo(testFileName2)); + .body("data[1].label", equalTo(testFileName2)) + .body("total_count", equalTo(5)); + String x = getVersionFilesResponsePaginated.prettyPrint(); int fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); @@ -3562,7 +3553,8 @@ public void getVersionFiles() throws IOException, InterruptedException { getVersionFilesResponsePaginated.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].label", equalTo(testFileName3)) - .body("data[1].label", equalTo(testFileName4)); + .body("data[1].label", equalTo(testFileName4)) + .body("total_count", equalTo(5)); fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); From 0807b1fd64b076ef92029a16b1c3a946802c56b7 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 8 Jan 2024 16:18:55 -0500 Subject: [PATCH 401/414] fix format --- src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 2a2843c0494..419132f7ba7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -635,6 +635,7 @@ protected Response ok( JsonArrayBuilder bld ) { .add("data", bld).build()) .type(MediaType.APPLICATION_JSON).build(); } + protected Response ok( JsonArrayBuilder bld , long totalCount) { return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) @@ -642,6 +643,7 @@ protected Response ok( JsonArrayBuilder bld , long totalCount) { .add("data", bld).build()) .type(MediaType.APPLICATION_JSON).build(); } + protected Response ok( JsonArray ja ) { return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) From 53e525d7ddddcc4fd055f45debc126f8b2340ffc Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 8 Jan 2024 16:24:21 -0500 Subject: [PATCH 402/414] fix format --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index ace69a6c606..91aa33f6b1f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3539,7 +3539,6 @@ public void getVersionFiles() throws IOException, InterruptedException { .body("data[0].label", equalTo(testFileName1)) .body("data[1].label", equalTo(testFileName2)) .body("total_count", equalTo(5)); - String x = getVersionFilesResponsePaginated.prettyPrint(); int fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); From 622a676681a336fd78e89d1f6d21e3e703eb7d7a Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Tue, 9 Jan 2024 10:32:12 -0500 Subject: [PATCH 403/414] updated per review comments --- ...-extend-getVersionFiles-api-to-include-total-file-count.md | 2 ++ doc/sphinx-guides/source/api/native-api.rst | 4 +++- .../java/edu/harvard/iq/dataverse/api/AbstractApiBean.java | 2 +- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 1 - src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 4 ++-- 5 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md diff --git a/doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md b/doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md new file mode 100644 index 00000000000..80a71e9bb7e --- /dev/null +++ b/doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md @@ -0,0 +1,2 @@ +The response for getVersionFiles (/api/datasets/{id}/versions/{versionId}/files) endpoint has been modified to include a total count of records available (totalCount:x). +This will aid in pagination by allowing the caller to know how many pages can be iterated through. The existing API (getVersionFileCounts) to return the count will still be available. \ No newline at end of file diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 6591c983824..48fc16bf141 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1066,7 +1066,9 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files" -This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters: +This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters. +To aid in pagination the Json response also includes the total number of rows (totalCount) available. +Usage example: .. code-block:: bash diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 419132f7ba7..bc94d7f0bcc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -639,7 +639,7 @@ protected Response ok( JsonArrayBuilder bld ) { protected Response ok( JsonArrayBuilder bld , long totalCount) { return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) - .add("total_count", totalCount) + .add("totalCount", totalCount) .add("data", bld).build()) .type(MediaType.APPLICATION_JSON).build(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 56b9e8df319..3a2497d9418 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -480,7 +480,6 @@ public Response getVersionFiles(@Context ContainerRequestContext crc, } catch (IllegalArgumentException e) { return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus))); } - // TODO: should we count the total every time or only when offset = 0? return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)), datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria)); }, getRequestUser(crc)); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 91aa33f6b1f..5753550d564 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3538,7 +3538,7 @@ public void getVersionFiles() throws IOException, InterruptedException { .statusCode(OK.getStatusCode()) .body("data[0].label", equalTo(testFileName1)) .body("data[1].label", equalTo(testFileName2)) - .body("total_count", equalTo(5)); + .body("totalCount", equalTo(5)); int fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); @@ -3553,7 +3553,7 @@ public void getVersionFiles() throws IOException, InterruptedException { .statusCode(OK.getStatusCode()) .body("data[0].label", equalTo(testFileName3)) .body("data[1].label", equalTo(testFileName4)) - .body("total_count", equalTo(5)); + .body("totalCount", equalTo(5)); fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); From dfb1795e1318d058c4b614894ce9cd1039da38d3 Mon Sep 17 00:00:00 2001 From: Guillermo Portas Date: Tue, 9 Jan 2024 17:37:06 +0000 Subject: [PATCH 404/414] Added: minor docs formatting tweaks --- doc/sphinx-guides/source/api/native-api.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 48fc16bf141..09fc3c69693 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1067,7 +1067,9 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files" This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters. -To aid in pagination the Json response also includes the total number of rows (totalCount) available. + +To aid in pagination the JSON response also includes the total number of rows (totalCount) available. + Usage example: .. code-block:: bash From b9bcf995b42889af3333368b3264f49264df52ef Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva <142103991+jp-tosca@users.noreply.github.com> Date: Tue, 9 Jan 2024 14:58:32 -0500 Subject: [PATCH 405/414] Update Kanban Board URL The URL was pointing to the old board. --- doc/sphinx-guides/source/developers/intro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst index a01a8066897..f446b73de09 100755 --- a/doc/sphinx-guides/source/developers/intro.rst +++ b/doc/sphinx-guides/source/developers/intro.rst @@ -40,7 +40,7 @@ For the Dataverse Software development roadmap, please see https://www.iq.harvar Kanban Board ------------ -You can get a sense of what's currently in flight (in dev, in QA, etc.) by looking at https://github.com/orgs/IQSS/projects/2 +You can get a sense of what's currently in flight (in dev, in QA, etc.) by looking at https://github.com/orgs/IQSS/projects/34 Issue Tracker ------------- From 94570f0c670e6d39594c5cfb9ca5233962834de0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 10 Jan 2024 10:59:21 -0500 Subject: [PATCH 406/414] add toc to docs #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 2f922fb1fc0..b5d420467aa 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -1,6 +1,9 @@ Globus Transfer API =================== +.. contents:: |toctitle| + :local: + The Globus API addresses three use cases: * Transfer to a Dataverse-managed Globus endpoint (File-based or using the Globus S3 Connector) From b1bb6a047cc347a6d6c97ba9f56060d3805ec545 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 11:35:34 -0500 Subject: [PATCH 407/414] minor doc tweaks #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index b5d420467aa..96475f33230 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -72,7 +72,7 @@ The response includes the id for the Globus endpoint to use along with several s The getDatasetMetadata and getFileListing URLs are just signed versions of the standard Dataset metadata and file listing API calls. The other two are Globus specific. -If called for a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a +If called for, a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a the "managed" parameter will be false, the "endpoint" parameter is replaced with a JSON array of "referenceEndpointsWithPaths" and the requestGlobusTransferPaths and addGlobusFiles URLs are replaced with ones for requestGlobusReferencePaths and addFiles. All of these calls are described further below. @@ -91,7 +91,7 @@ The returned response includes the same getDatasetMetadata and getFileListing UR Performing an Upload/Transfer In -------------------------------- -The information from the API call above can be used to provide a user with information about the dataset and to prepare to transfer or to reference files (based on the "managed" parameter). +The information from the API call above can be used to provide a user with information about the dataset and to prepare to transfer (managed=true) or to reference files (managed=false). Once the user identifies which files are to be added, the requestGlobusTransferPaths or requestGlobusReferencePaths URLs can be called. These both reference the same API call but must be used with different entries in the JSON body sent: From 1c3162f01cb921b21a72042ea03b1e9ca94c6da9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 11:49:01 -0500 Subject: [PATCH 408/414] typo #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 96475f33230..57748d0afc9 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -170,7 +170,7 @@ In the managed case, once a Globus transfer has been initiated a final API call curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA" -Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL. +Note that the mimetype is multipart/form-data, matching the /addFiles API call. Also note that the API_TOKEN is not needed when using a signed URL. With this information, Dataverse will begin to monitor the transfer and when it completes, will add all files for which the transfer succeeded. As the transfer can take significant time and the API call is asynchronous, the only way to determine if the transfer succeeded via API is to use the standard calls to check the dataset lock state and contents. From 8cc2e7c0e5ba16b2f380f8fd31531e1f90271c12 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 11:56:50 -0500 Subject: [PATCH 409/414] fix path in globus endpoint docs #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 57748d0afc9..a9cfe5aedff 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -102,7 +102,7 @@ Once the user identifies which files are to be added, the requestGlobusTransferP export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV export LOCALE=en-US - curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusUpload" + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusUploadPaths" Note that when using the dataverse-globus app or the return from the previous call, the URL for this call will be signed and no API_TOKEN is needed. From c3556e012a03b1e131146821faabb183b1a62a87 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 12:14:24 -0500 Subject: [PATCH 410/414] add missing trailing double quote #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index a9cfe5aedff..5a90243bd93 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -168,7 +168,7 @@ In the managed case, once a Globus transfer has been initiated a final API call "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \ {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}' - curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA" + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles" -F "jsonData=$JSON_DATA" Note that the mimetype is multipart/form-data, matching the /addFiles API call. Also note that the API_TOKEN is not needed when using a signed URL. From 3a81926980edc7c8228dddf18a8f1305b32fc2c8 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 15:40:14 -0500 Subject: [PATCH 411/414] add requestGlobusUploadPaths to UtilIT #10200 --- src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e29677c2252..33dda05b4d7 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3718,4 +3718,12 @@ static Response requestGlobusDownload(Integer datasetId, JsonObject body, String .post("/api/datasets/" + datasetId + "/requestGlobusDownload"); } + static Response requestGlobusUploadPaths(Integer datasetId, JsonObject body, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(body.toString()) + .contentType("application/json") + .post("/api/datasets/" + datasetId + "/requestGlobusUploadPaths"); + } + } From 83120012480ce12ef8db3d33d3a1c93c4605945a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 15:47:17 -0500 Subject: [PATCH 412/414] clarify where taskIdentifier comes from #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 5a90243bd93..834db8161f0 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -157,7 +157,7 @@ In the remote/reference case, the map is from the initially supplied endpoint/pa Adding Files to the Dataset --------------------------- -In the managed case, once a Globus transfer has been initiated a final API call is made to Dataverse to provide it with the task identifier of the transfer and information about the files being transferred: +In the managed case, you must initiate a Globus transfer and take note of its task identifier. As in the JSON example below, you will pass it as ``taskIdentifier`` along with details about the files you are transferring: .. code-block:: bash From d86ab1587cb5088330c2df6565744769cc859119 Mon Sep 17 00:00:00 2001 From: Vera Clemens Date: Fri, 12 Jan 2024 11:36:30 +0100 Subject: [PATCH 413/414] test: use curator role in testListRoleAssignments --- scripts/api/data/role-contributor-plus.json | 12 ---------- .../harvard/iq/dataverse/api/DatasetsIT.java | 22 ++++--------------- 2 files changed, 4 insertions(+), 30 deletions(-) delete mode 100644 scripts/api/data/role-contributor-plus.json diff --git a/scripts/api/data/role-contributor-plus.json b/scripts/api/data/role-contributor-plus.json deleted file mode 100644 index ef9ba3aaff6..00000000000 --- a/scripts/api/data/role-contributor-plus.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "alias":"contributorPlus", - "name":"ContributorPlus", - "description":"For datasets, a person who can edit License + Terms, then submit them for review, and add collaborators.", - "permissions":[ - "ViewUnpublishedDataset", - "EditDataset", - "DownloadFile", - "DeleteDatasetDraft", - "ManageDatasetPermissions" - ] -} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index b51d400d2d4..787b9b018a9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -1349,17 +1349,11 @@ public void testListRoleAssignments() { Response notPermittedToListRoleAssignmentOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken); assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataset.getStatusCode()); - // We create a new role that includes "ManageDatasetPermissions" which are required for listing role assignments - // of a dataset and assign it to the contributor user + // We assign the curator role to the contributor user + // (includes "ManageDatasetPermissions" which are required for listing role assignments of a dataset, but not + // "ManageDataversePermissions") - String pathToJsonFile = "scripts/api/data/role-contributor-plus.json"; - Response addDataverseRoleResponse = UtilIT.addDataverseRole(pathToJsonFile, dataverseAlias, adminApiToken); - addDataverseRoleResponse.prettyPrint(); - String body = addDataverseRoleResponse.getBody().asString(); - String status = JsonPath.from(body).getString("status"); - assertEquals("OK", status); - - Response giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "contributorPlus", "@" + contributorUsername, adminApiToken); + Response giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "curator", "@" + contributorUsername, adminApiToken); giveRandoPermission.prettyPrint(); assertEquals(200, giveRandoPermission.getStatusCode()); @@ -1373,14 +1367,6 @@ public void testListRoleAssignments() { notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken); assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode()); - - // Finally, we clean up and delete the role we created - - Response deleteDataverseRoleResponse = UtilIT.deleteDataverseRole("contributorPlus", adminApiToken); - deleteDataverseRoleResponse.prettyPrint(); - body = deleteDataverseRoleResponse.getBody().asString(); - status = JsonPath.from(body).getString("status"); - assertEquals("OK", status); } @Test From ff044632aff9c2b98aea01da934cfbf63476dc40 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 16 Jan 2024 11:32:17 -0500 Subject: [PATCH 414/414] add release note #9926 --- doc/release-notes/9926-list-role-assignments-permissions.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/9926-list-role-assignments-permissions.md diff --git a/doc/release-notes/9926-list-role-assignments-permissions.md b/doc/release-notes/9926-list-role-assignments-permissions.md new file mode 100644 index 00000000000..43cd83dc5c9 --- /dev/null +++ b/doc/release-notes/9926-list-role-assignments-permissions.md @@ -0,0 +1 @@ +Listing collction/dataverse role assignments via API still requires ManageDataversePermissions, but listing dataset role assignments via API now requires only ManageDatasetPermissions.