From edd79d4814e1c733713a44a3d98ba2a22448748d Mon Sep 17 00:00:00 2001 From: Dmitri Bourlatchkov Date: Wed, 29 Jan 2025 15:38:35 -0500 Subject: [PATCH] Catalog: Add table config overrides to bucket configuration (#10296) * Catalog: Add table config overrides to bucket configuration * Add a new property map to `BucketOptions` to allow users to configure Iceberg table config overrides or provide custom table-level properties to Catalog clients. --- CHANGELOG.md | 3 + .../catalog/files/config/BucketOptions.java | 8 +- .../catalog/files/adls/AdlsObjectIO.java | 2 + .../catalog/files/gcs/GcsObjectIO.java | 2 + .../catalog/files/s3/S3ObjectIO.java | 2 + .../catalog/files/TestObjectIO.java | 136 ++++++++++++++++++ 6 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 catalog/files/impl/src/test/java/org/projectnessie/catalog/files/TestObjectIO.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 911a8adc5b2..8a7c5b1b9c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,9 @@ as necessary. Empty sections will not end in the release notes. ### New Features +- Catalog: Iceberg table configurations overrides are now available in storage configuration settings. + Example: `nessie.catalog.service.s3.default-options.table-config-overrides.py-io-impl=pyiceberg.io.pyarrow.PyArrowFileIO` + ### Changes ### Deprecations diff --git a/catalog/files/api/src/main/java/org/projectnessie/catalog/files/config/BucketOptions.java b/catalog/files/api/src/main/java/org/projectnessie/catalog/files/config/BucketOptions.java index 4963e167b33..924b3aa990e 100644 --- a/catalog/files/api/src/main/java/org/projectnessie/catalog/files/config/BucketOptions.java +++ b/catalog/files/api/src/main/java/org/projectnessie/catalog/files/config/BucketOptions.java @@ -15,4 +15,10 @@ */ package org.projectnessie.catalog.files.config; -public interface BucketOptions {} +import java.util.Map; + +public interface BucketOptions { + + /** Iceberg table configuration overrides for all tables stored in this bucket. */ + Map tableConfigOverrides(); +} diff --git a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsObjectIO.java b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsObjectIO.java index 6a3350be247..2e92c4f7f97 100644 --- a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsObjectIO.java +++ b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsObjectIO.java @@ -230,6 +230,8 @@ void icebergConfigOverrides( storageAccountShort.ifPresent( account -> config.accept(ADLS_SAS_TOKEN_PREFIX + account, sasToken)); }); + + fileSystemOptions.tableConfigOverrides().forEach(config); } void icebergConfigDefaults(BiConsumer config) { diff --git a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/gcs/GcsObjectIO.java b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/gcs/GcsObjectIO.java index fb915b0eb91..7542bff6ebe 100644 --- a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/gcs/GcsObjectIO.java +++ b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/gcs/GcsObjectIO.java @@ -208,6 +208,8 @@ public void configureIcebergTable( GCS_OAUTH2_TOKEN_EXPIRES_AT, Long.toString(i.toEpochMilli()))); }); } + + bucketOptions.tableConfigOverrides().forEach(config); } @Override diff --git a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/s3/S3ObjectIO.java b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/s3/S3ObjectIO.java index 16876a6e76e..ef6053296ae 100644 --- a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/s3/S3ObjectIO.java +++ b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/s3/S3ObjectIO.java @@ -214,6 +214,8 @@ public void configureIcebergTable( config.accept(S3_SECRET_ACCESS_KEY, s3credentials.secretAccessKey()); s3credentials.sessionToken().ifPresent(t -> config.accept(S3_SESSION_TOKEN, t)); } + + bucketOptions.tableConfigOverrides().forEach(config); } @Override diff --git a/catalog/files/impl/src/test/java/org/projectnessie/catalog/files/TestObjectIO.java b/catalog/files/impl/src/test/java/org/projectnessie/catalog/files/TestObjectIO.java new file mode 100644 index 00000000000..ce1051bd33d --- /dev/null +++ b/catalog/files/impl/src/test/java/org/projectnessie/catalog/files/TestObjectIO.java @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2025 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.catalog.files; + +import static org.projectnessie.catalog.files.api.ObjectIO.PYICEBERG_FILE_IO_IMPL; +import static org.projectnessie.catalog.secrets.UnsafePlainTextSecretsManager.unsafePlainTextSecretsProvider; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import org.assertj.core.api.SoftAssertions; +import org.assertj.core.api.junit.jupiter.InjectSoftAssertions; +import org.assertj.core.api.junit.jupiter.SoftAssertionsExtension; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.projectnessie.catalog.files.adls.AdlsClientSupplier; +import org.projectnessie.catalog.files.adls.AdlsObjectIO; +import org.projectnessie.catalog.files.api.ObjectIO; +import org.projectnessie.catalog.files.api.StorageLocations; +import org.projectnessie.catalog.files.config.BucketOptions; +import org.projectnessie.catalog.files.config.ImmutableAdlsConfig; +import org.projectnessie.catalog.files.config.ImmutableAdlsFileSystemOptions; +import org.projectnessie.catalog.files.config.ImmutableAdlsOptions; +import org.projectnessie.catalog.files.config.ImmutableGcsBucketOptions; +import org.projectnessie.catalog.files.config.ImmutableGcsConfig; +import org.projectnessie.catalog.files.config.ImmutableGcsOptions; +import org.projectnessie.catalog.files.config.ImmutableS3BucketOptions; +import org.projectnessie.catalog.files.config.ImmutableS3Options; +import org.projectnessie.catalog.files.config.S3Options; +import org.projectnessie.catalog.files.gcs.GcsObjectIO; +import org.projectnessie.catalog.files.gcs.GcsStorageSupplier; +import org.projectnessie.catalog.files.s3.S3ClientSupplier; +import org.projectnessie.catalog.files.s3.S3ObjectIO; +import org.projectnessie.catalog.secrets.ResolvingSecretsProvider; +import org.projectnessie.catalog.secrets.SecretsProvider; +import org.projectnessie.storage.uri.StorageUri; + +@ExtendWith(SoftAssertionsExtension.class) +class TestObjectIO { + private static final SecretsProvider secretsProvider = + ResolvingSecretsProvider.builder() + .putSecretsManager("plain", unsafePlainTextSecretsProvider(Map.of())) + .build(); + + @InjectSoftAssertions protected SoftAssertions soft; + + private static S3ObjectIO s3(BucketOptions options) { + S3Options s3Options = + ImmutableS3Options.builder() + .defaultOptions(ImmutableS3BucketOptions.builder().from(options).build()) + .build(); + return new S3ObjectIO(new S3ClientSupplier(null, s3Options, null, secretsProvider), null); + } + + private static GcsObjectIO gcs(BucketOptions options) { + GcsStorageSupplier supplier = + new GcsStorageSupplier( + null, + ImmutableGcsConfig.builder().build(), + ImmutableGcsOptions.builder() + .defaultOptions(ImmutableGcsBucketOptions.builder().from(options).build()) + .build(), + secretsProvider); + return new GcsObjectIO(supplier); + } + + private static AdlsObjectIO adls(BucketOptions options) { + AdlsClientSupplier supplier = + new AdlsClientSupplier( + null, + ImmutableAdlsConfig.builder().build(), + ImmutableAdlsOptions.builder() + .defaultOptions(ImmutableAdlsFileSystemOptions.builder().from(options).build()) + .build(), + secretsProvider); + return new AdlsObjectIO(supplier); + } + + public static Stream tableConfigWithOverrides() { + BucketOptions defaults = ImmutableS3BucketOptions.builder().build(); + BucketOptions overrides = + ImmutableS3BucketOptions.builder() + .putTableConfigOverrides(PYICEBERG_FILE_IO_IMPL, "FileIOOverride1") + .putTableConfigOverrides("prop1", "test-value-1") + .build(); + return Stream.of( + Arguments.of( + s3(defaults), "s3://wh", PYICEBERG_FILE_IO_IMPL, "pyiceberg.io.fsspec.FsspecFileIO"), + Arguments.of(s3(overrides), "s3://wh", PYICEBERG_FILE_IO_IMPL, "FileIOOverride1"), + Arguments.of(s3(overrides), "s3://wh", "prop1", "test-value-1"), + Arguments.of( + gcs(defaults), "gs://wh", PYICEBERG_FILE_IO_IMPL, "pyiceberg.io.fsspec.FsspecFileIO"), + Arguments.of(gcs(overrides), "gs://wh", PYICEBERG_FILE_IO_IMPL, "FileIOOverride1"), + Arguments.of(gcs(overrides), "gs://wh", "prop1", "test-value-1"), + Arguments.of( + adls(defaults), + "abfss://wh", + PYICEBERG_FILE_IO_IMPL, + "pyiceberg.io.fsspec.FsspecFileIO"), + Arguments.of(adls(overrides), "abfss://wh", PYICEBERG_FILE_IO_IMPL, "FileIOOverride1"), + Arguments.of(adls(overrides), "abfss://wh", "prop1", "test-value-1")); + } + + @ParameterizedTest + @MethodSource + void tableConfigWithOverrides( + ObjectIO objectIO, String warehouseLocation, String propName, String expectedValue) { + StorageUri whUri = StorageUri.of(warehouseLocation); + StorageLocations storageLocations = + StorageLocations.storageLocations(whUri, List.of(whUri), List.of()); + Map config = new HashMap<>(); + + objectIO.configureIcebergTable(storageLocations, config::put, () -> false, false); + soft.assertThat(config).containsEntry(propName, expectedValue); + + config.clear(); + objectIO.configureIcebergTable(storageLocations, config::put, () -> true, true); + soft.assertThat(config).containsEntry(propName, expectedValue); + } +}