From 1a8e9b4554eb1f534aaec067fea28f35403fc1e4 Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 27 Dec 2023 10:15:03 +0800 Subject: [PATCH] [Feature][OssFile Connector] Make Oss implement source factory and sink factory (#6062) --- docs/en/connector-v2/sink/OssFile.md | 117 +++++++++++ docs/en/connector-v2/source/OssFile.md | 171 +++++++++++++++ .../file/config/BaseFileSourceConfig.java | 3 + .../BaseMultipleTableFinkSinkFactory.java | 92 +++++++++ .../source/reader/AbstractReadStrategy.java | 2 +- .../file/local/sink/LocalFileSinkFactory.java | 66 +----- .../{OssConf.java => OssHadoopConf.java} | 20 +- .../seatunnel/file/oss/sink/OssFileSink.java | 42 +--- .../file/oss/sink/OssFileSinkFactory.java | 18 +- .../file/oss/source/OssFileSource.java | 108 +--------- .../file/oss/source/OssFileSourceFactory.java | 25 ++- .../MultipleTableOssFileSourceConfig.java | 34 +++ .../source/config/OssFileSourceConfig.java | 46 +++++ seatunnel-dist/pom.xml | 6 - .../connector-file-oss-e2e/pom.xml | 70 +++++++ .../e2e/connector/file/oss/OssFileIT.java | 194 ++++++++++++++++++ .../file/oss/OssFileWithMultipleTableIT.java | 162 +++++++++++++++ .../e2e/connector/file/oss/OssUtils.java | 105 ++++++++++ .../src/test/resources/excel/e2e.xlsx | Bin 0 -> 5823 bytes .../resources/excel/fake_to_oss_excel.conf | 77 +++++++ .../excel/oss_excel_projection_to_assert.conf | 108 ++++++++++ .../resources/excel/oss_excel_to_assert.conf | 134 ++++++++++++ ...ss_excel_to_assert_with_multipletable.conf | 132 ++++++++++++ .../excel/oss_filter_excel_to_assert.conf | 135 ++++++++++++ .../src/test/resources/json/e2e.json | 5 + .../src/test/resources/json/e2e.json.lzo | Bin 0 -> 3466 bytes .../resources/json/fake_to_oss_file_json.conf | 83 ++++++++ .../json/oss_file_json_lzo_to_console.conf | 143 +++++++++++++ .../json/oss_file_json_to_assert.conf | 132 ++++++++++++ ...ile_json_to_assert_with_multipletable.conf | 128 ++++++++++++ .../resources/json/oss_file_to_console.conf | 45 ++++ .../src/test/resources/orc/e2e.orc | Bin 0 -> 5730 bytes .../resources/orc/fake_to_oss_file_orc.conf | 84 ++++++++ .../oss_file_orc_projection_to_assert.conf | 81 ++++++++ .../resources/orc/oss_file_orc_to_assert.conf | 80 ++++++++ ...file_orc_to_assert_with_multipletable.conf | 64 ++++++ .../src/test/resources/parquet/e2e.parquet | Bin 0 -> 9730 bytes .../parquet/fake_to_oss_file_parquet.conf | 84 ++++++++ ...oss_file_parquet_projection_to_assert.conf | 81 ++++++++ .../parquet/oss_file_parquet_to_assert.conf | 98 +++++++++ ..._parquet_to_assert_with_multipletable.conf | 64 ++++++ .../parquet/oss_file_to_console.conf | 41 ++++ .../src/test/resources/text/e2e.txt | 5 + .../src/test/resources/text/e2e.txt.lzo | Bin 0 -> 2720 bytes .../src/test/resources/text/e2e_delimiter.txt | 5 + .../test/resources/text/e2e_time_format.txt | 5 + .../resources/text/fake_to_oss_file_text.conf | 84 ++++++++ .../fake_to_oss_file_with_multiple_table.conf | 125 +++++++++++ .../text/oss_file_delimiter_assert.conf | 108 ++++++++++ .../text/oss_file_text_lzo_to_assert.conf | 142 +++++++++++++ .../oss_file_text_projection_to_assert.conf | 133 ++++++++++++ .../text/oss_file_text_skip_headers.conf | 133 ++++++++++++ .../text/oss_file_text_to_assert.conf | 132 ++++++++++++ ...ile_text_to_assert_with_multipletable.conf | 128 ++++++++++++ .../text/oss_file_time_format_assert.conf | 99 +++++++++ .../seatunnel-connector-v2-e2e/pom.xml | 1 + .../parse/MultipleTableJobConfigParser.java | 4 + .../seatunnel-engine-examples/pom.xml | 6 - 58 files changed, 3959 insertions(+), 231 deletions(-) create mode 100644 seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/factory/BaseMultipleTableFinkSinkFactory.java rename seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/config/{OssConf.java => OssHadoopConf.java} (69%) create mode 100644 seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/config/MultipleTableOssFileSourceConfig.java create mode 100644 seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/config/OssFileSourceConfig.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/pom.xml create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssFileIT.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssFileWithMultipleTableIT.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssUtils.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/e2e.xlsx create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/fake_to_oss_excel.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_projection_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_to_assert_with_multipletable.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_filter_excel_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/e2e.json create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/e2e.json.lzo create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/fake_to_oss_file_json.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_lzo_to_console.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_to_assert_with_multipletable.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_to_console.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/orc/e2e.orc create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/orc/fake_to_oss_file_orc.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/orc/oss_file_orc_projection_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/orc/oss_file_orc_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/orc/oss_file_orc_to_assert_with_multipletable.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/e2e.parquet create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/fake_to_oss_file_parquet.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_projection_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_to_assert_with_multipletable.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_to_console.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/e2e.txt create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/e2e.txt.lzo create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/e2e_delimiter.txt create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/e2e_time_format.txt create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/fake_to_oss_file_text.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/fake_to_oss_file_with_multiple_table.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/oss_file_delimiter_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/oss_file_text_lzo_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/oss_file_text_projection_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/oss_file_text_skip_headers.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/oss_file_text_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/oss_file_text_to_assert_with_multipletable.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/oss_file_time_format_assert.conf diff --git a/docs/en/connector-v2/sink/OssFile.md b/docs/en/connector-v2/sink/OssFile.md index 9a6b7197253..f9e817ba562 100644 --- a/docs/en/connector-v2/sink/OssFile.md +++ b/docs/en/connector-v2/sink/OssFile.md @@ -362,6 +362,123 @@ sink { } ``` +### Multiple Table + +For extract source metadata from upstream, you can use `${database_name}`, `${table_name}` and `${schema_name}` in the path. + +```bash + +env { + parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + FakeSource { + tables_configs = [ + { + schema = { + table = "fake1" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + }, + { + schema = { + table = "fake2" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } + ] + } +} + +sink { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/tmp/fake_empty/text/${table_name}" + row_delimiter = "\n" + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + file_name_expression = "${transactionId}_${now}" + file_format_type = "text" + filename_time_format = "yyyy.MM.dd" + is_enable_transaction = true + compress_codec = "lzo" + } +} + +``` + ## Changelog ### 2.2.0-beta 2022-09-26 diff --git a/docs/en/connector-v2/source/OssFile.md b/docs/en/connector-v2/source/OssFile.md index d87f36bfa03..233eb76800f 100644 --- a/docs/en/connector-v2/source/OssFile.md +++ b/docs/en/connector-v2/source/OssFile.md @@ -293,6 +293,177 @@ sink { } ``` +### Multiple Table + +No need to config schema file type, eg: `orc`. + +``` +env { + parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + tables_configs = [ + { + schema = { + table = "fake01" + } + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/orc" + file_format_type = "orc" + }, + { + schema = { + table = "fake02" + } + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/orc" + file_format_type = "orc" + } + ] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + table-names = ["fake01", "fake02"] + } + } +} +``` + +Need config schema file type, eg: `json` + +``` + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + tables_configs = [ + { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/json" + file_format_type = "json" + schema = { + table = "fake01" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + }, + { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/json" + file_format_type = "json" + schema = { + table = "fake02" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + } + ] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + table-names = ["fake01", "fake02"] + } + } +} +``` + ## Changelog ### 2.2.0-beta 2022-09-26 diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseFileSourceConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseFileSourceConfig.java index f46c7352840..520d40f9be2 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseFileSourceConfig.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseFileSourceConfig.java @@ -51,15 +51,18 @@ public abstract class BaseFileSourceConfig implements Serializable { private final FileFormat fileFormat; private final ReadStrategy readStrategy; private final List filePaths; + private final ReadonlyConfig baseFileSourceConfig; public abstract HadoopConf getHadoopConfig(); public abstract String getPluginName(); public BaseFileSourceConfig(ReadonlyConfig readonlyConfig) { + this.baseFileSourceConfig = readonlyConfig; this.fileFormat = readonlyConfig.get(BaseSourceConfigOptions.FILE_FORMAT_TYPE); this.readStrategy = ReadStrategyFactory.of(readonlyConfig, getHadoopConfig()); this.filePaths = parseFilePaths(readonlyConfig); + this.catalogTable = parseCatalogTable(readonlyConfig); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/factory/BaseMultipleTableFinkSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/factory/BaseMultipleTableFinkSinkFactory.java new file mode 100644 index 00000000000..b8c60f0e197 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/factory/BaseMultipleTableFinkSinkFactory.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.factory; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.apache.seatunnel.shade.com.typesafe.config.ConfigValueFactory; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.SinkReplaceNameConstant; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.factory.TableSinkFactory; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.file.config.BaseSinkConfig; +import org.apache.seatunnel.connectors.seatunnel.file.sink.commit.FileAggregatedCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.commit.FileCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.state.FileSinkState; + +public abstract class BaseMultipleTableFinkSinkFactory + implements TableSinkFactory< + SeaTunnelRow, FileSinkState, FileCommitInfo, FileAggregatedCommitInfo> { + + // replace the table name in sink config's path + public ReadonlyConfig generateCurrentReadonlyConfig( + ReadonlyConfig readonlyConfig, CatalogTable catalogTable) { + // Copy the config to avoid modifying the original config + Config config = readonlyConfig.toConfig(); + + if (config.hasPath(BaseSinkConfig.FILE_PATH.key())) { + String replacedPath = + replaceCatalogTableInPath( + config.getString(BaseSinkConfig.FILE_PATH.key()), catalogTable); + config = + config.withValue( + BaseSinkConfig.FILE_PATH.key(), + ConfigValueFactory.fromAnyRef(replacedPath)); + } + + if (config.hasPath(BaseSinkConfig.TMP_PATH.key())) { + String replacedPath = + replaceCatalogTableInPath( + config.getString(BaseSinkConfig.TMP_PATH.key()), catalogTable); + config = + config.withValue( + BaseSinkConfig.TMP_PATH.key(), + ConfigValueFactory.fromAnyRef(replacedPath)); + } + + return ReadonlyConfig.fromConfig(config); + } + + public String replaceCatalogTableInPath(String originString, CatalogTable catalogTable) { + String path = originString; + TableIdentifier tableIdentifier = catalogTable.getTableId(); + if (tableIdentifier != null) { + if (tableIdentifier.getDatabaseName() != null) { + path = + path.replace( + SinkReplaceNameConstant.REPLACE_DATABASE_NAME_KEY, + tableIdentifier.getDatabaseName()); + } + if (tableIdentifier.getSchemaName() != null) { + path = + path.replace( + SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY, + tableIdentifier.getSchemaName()); + } + if (tableIdentifier.getTableName() != null) { + path = + path.replace( + SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY, + tableIdentifier.getTableName()); + } + } + return path; + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java index dc731879aeb..071414c9da3 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java @@ -97,7 +97,7 @@ public List getFileNamesByPath(String path) throws IOException { fileNames.addAll(getFileNamesByPath(fileStatus.getPath().toString())); continue; } - if (fileStatus.isFile() && filterFileByPattern(fileStatus)) { + if (fileStatus.isFile() && filterFileByPattern(fileStatus) && fileStatus.getLen() > 0) { // filter '_SUCCESS' file if (!fileStatus.getPath().getName().equals("_SUCCESS") && !fileStatus.getPath().getName().startsWith(".")) { diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java index d9232f4ddc5..f65a93f9095 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java @@ -17,22 +17,17 @@ package org.apache.seatunnel.connectors.seatunnel.file.local.sink; -import org.apache.seatunnel.shade.com.typesafe.config.Config; -import org.apache.seatunnel.shade.com.typesafe.config.ConfigValueFactory; - import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; -import org.apache.seatunnel.api.sink.SinkReplaceNameConstant; import org.apache.seatunnel.api.table.catalog.CatalogTable; -import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; -import org.apache.seatunnel.api.table.factory.TableSinkFactory; import org.apache.seatunnel.api.table.factory.TableSinkFactoryContext; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.connectors.seatunnel.file.config.BaseSinkConfig; import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; import org.apache.seatunnel.connectors.seatunnel.file.config.FileSystemType; +import org.apache.seatunnel.connectors.seatunnel.file.factory.BaseMultipleTableFinkSinkFactory; import org.apache.seatunnel.connectors.seatunnel.file.sink.commit.FileAggregatedCommitInfo; import org.apache.seatunnel.connectors.seatunnel.file.sink.commit.FileCommitInfo; import org.apache.seatunnel.connectors.seatunnel.file.sink.state.FileSinkState; @@ -40,9 +35,7 @@ import com.google.auto.service.AutoService; @AutoService(Factory.class) -public class LocalFileSinkFactory - implements TableSinkFactory< - SeaTunnelRow, FileSinkState, FileCommitInfo, FileAggregatedCommitInfo> { +public class LocalFileSinkFactory extends BaseMultipleTableFinkSinkFactory { @Override public String factoryIdentifier() { return FileSystemType.LOCAL.getFileSystemPluginName(); @@ -108,59 +101,4 @@ public OptionRule optionRule() { generateCurrentReadonlyConfig(readonlyConfig, catalogTable); return () -> new LocalFileSink(finalReadonlyConfig, catalogTable); } - - // replace the table name in sink config's path - private ReadonlyConfig generateCurrentReadonlyConfig( - ReadonlyConfig readonlyConfig, CatalogTable catalogTable) { - // Copy the config to avoid modifying the original config - Config config = readonlyConfig.toConfig(); - - if (config.hasPath(BaseSinkConfig.FILE_PATH.key())) { - String replacedPath = - replaceCatalogTableInPath( - config.getString(BaseSinkConfig.FILE_PATH.key()), catalogTable); - config = - config.withValue( - BaseSinkConfig.FILE_PATH.key(), - ConfigValueFactory.fromAnyRef(replacedPath)); - } - - if (config.hasPath(BaseSinkConfig.TMP_PATH.key())) { - String replacedPath = - replaceCatalogTableInPath( - config.getString(BaseSinkConfig.TMP_PATH.key()), catalogTable); - config = - config.withValue( - BaseSinkConfig.TMP_PATH.key(), - ConfigValueFactory.fromAnyRef(replacedPath)); - } - - return ReadonlyConfig.fromConfig(config); - } - - private String replaceCatalogTableInPath(String originString, CatalogTable catalogTable) { - String path = originString; - TableIdentifier tableIdentifier = catalogTable.getTableId(); - if (tableIdentifier != null) { - if (tableIdentifier.getDatabaseName() != null) { - path = - path.replace( - SinkReplaceNameConstant.REPLACE_DATABASE_NAME_KEY, - tableIdentifier.getDatabaseName()); - } - if (tableIdentifier.getSchemaName() != null) { - path = - path.replace( - SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY, - tableIdentifier.getSchemaName()); - } - if (tableIdentifier.getTableName() != null) { - path = - path.replace( - SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY, - tableIdentifier.getTableName()); - } - } - return path; - } } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/config/OssConf.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/config/OssHadoopConf.java similarity index 69% rename from seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/config/OssConf.java rename to seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/config/OssHadoopConf.java index 730af9bfdbb..7f4c9eb8098 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/config/OssConf.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/config/OssHadoopConf.java @@ -17,15 +17,14 @@ package org.apache.seatunnel.connectors.seatunnel.file.oss.config; -import org.apache.seatunnel.shade.com.typesafe.config.Config; - +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.connectors.seatunnel.file.config.HadoopConf; import org.apache.hadoop.fs.aliyun.oss.Constants; import java.util.HashMap; -public class OssConf extends HadoopConf { +public class OssHadoopConf extends HadoopConf { private static final String HDFS_IMPL = "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"; private static final String SCHEMA = "oss"; @@ -39,19 +38,16 @@ public String getSchema() { return SCHEMA; } - public OssConf(String hdfsNameKey) { + public OssHadoopConf(String hdfsNameKey) { super(hdfsNameKey); } - public static HadoopConf buildWithConfig(Config config) { - HadoopConf hadoopConf = new OssConf(config.getString(OssConfigOptions.BUCKET.key())); + public static HadoopConf buildWithConfig(ReadonlyConfig config) { + HadoopConf hadoopConf = new OssHadoopConf(config.get(OssConfigOptions.BUCKET)); HashMap ossOptions = new HashMap<>(); - ossOptions.put( - Constants.ACCESS_KEY_ID, config.getString(OssConfigOptions.ACCESS_KEY.key())); - ossOptions.put( - Constants.ACCESS_KEY_SECRET, - config.getString(OssConfigOptions.ACCESS_SECRET.key())); - ossOptions.put(Constants.ENDPOINT_KEY, config.getString(OssConfigOptions.ENDPOINT.key())); + ossOptions.put(Constants.ACCESS_KEY_ID, config.get(OssConfigOptions.ACCESS_KEY)); + ossOptions.put(Constants.ACCESS_KEY_SECRET, config.get(OssConfigOptions.ACCESS_SECRET)); + ossOptions.put(Constants.ENDPOINT_KEY, config.get(OssConfigOptions.ENDPOINT)); hadoopConf.setExtraOptions(ossOptions); return hadoopConf; } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSink.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSink.java index 4b32e93c397..5cab55f8edd 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSink.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSink.java @@ -17,47 +17,23 @@ package org.apache.seatunnel.connectors.seatunnel.file.oss.sink; -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.api.common.PrepareFailException; -import org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.sink.SeaTunnelSink; -import org.apache.seatunnel.common.config.CheckConfigUtil; -import org.apache.seatunnel.common.config.CheckResult; -import org.apache.seatunnel.common.constants.PluginType; +import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.connectors.seatunnel.file.config.FileSystemType; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException; -import org.apache.seatunnel.connectors.seatunnel.file.oss.config.OssConf; -import org.apache.seatunnel.connectors.seatunnel.file.oss.config.OssConfigOptions; -import org.apache.seatunnel.connectors.seatunnel.file.sink.BaseFileSink; +import org.apache.seatunnel.connectors.seatunnel.file.oss.config.OssHadoopConf; +import org.apache.seatunnel.connectors.seatunnel.file.sink.BaseMultipleTableFileSink; import com.google.auto.service.AutoService; @AutoService(SeaTunnelSink.class) -public class OssFileSink extends BaseFileSink { - @Override - public String getPluginName() { - return FileSystemType.OSS.getFileSystemPluginName(); +public class OssFileSink extends BaseMultipleTableFileSink { + public OssFileSink(ReadonlyConfig readonlyConfig, CatalogTable catalogTable) { + super(OssHadoopConf.buildWithConfig(readonlyConfig), readonlyConfig, catalogTable); } @Override - public void prepare(Config pluginConfig) throws PrepareFailException { - super.prepare(pluginConfig); - CheckResult result = - CheckConfigUtil.checkAllExists( - pluginConfig, - OssConfigOptions.FILE_PATH.key(), - OssConfigOptions.ENDPOINT.key(), - OssConfigOptions.ACCESS_KEY.key(), - OssConfigOptions.ACCESS_SECRET.key(), - OssConfigOptions.BUCKET.key()); - if (!result.isSuccess()) { - throw new FileConnectorException( - SeaTunnelAPIErrorCode.CONFIG_VALIDATION_FAILED, - String.format( - "PluginName: %s, PluginType: %s, Message: %s", - getPluginName(), PluginType.SINK, result.getMsg())); - } - hadoopConf = OssConf.buildWithConfig(pluginConfig); + public String getPluginName() { + return FileSystemType.OSS.getFileSystemPluginName(); } } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java index 6f168924c8e..49b5ff8bfa4 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java @@ -17,23 +17,37 @@ package org.apache.seatunnel.connectors.seatunnel.file.oss.sink; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; -import org.apache.seatunnel.api.table.factory.TableSinkFactory; +import org.apache.seatunnel.api.table.factory.TableSinkFactoryContext; import org.apache.seatunnel.connectors.seatunnel.file.config.BaseSinkConfig; import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; import org.apache.seatunnel.connectors.seatunnel.file.config.FileSystemType; +import org.apache.seatunnel.connectors.seatunnel.file.factory.BaseMultipleTableFinkSinkFactory; import org.apache.seatunnel.connectors.seatunnel.file.oss.config.OssConfigOptions; import com.google.auto.service.AutoService; @AutoService(Factory.class) -public class OssFileSinkFactory implements TableSinkFactory { +public class OssFileSinkFactory extends BaseMultipleTableFinkSinkFactory { @Override public String factoryIdentifier() { return FileSystemType.OSS.getFileSystemPluginName(); } + @Override + public TableSink createSink(TableSinkFactoryContext context) { + ReadonlyConfig readonlyConfig = context.getOptions(); + CatalogTable catalogTable = context.getCatalogTable(); + + ReadonlyConfig finalReadonlyConfig = + generateCurrentReadonlyConfig(readonlyConfig, catalogTable); + return () -> new OssFileSink(finalReadonlyConfig, catalogTable); + } + @Override public OptionRule optionRule() { return OptionRule.builder() diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSource.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSource.java index 8a88f0a2ffe..d7222eed6dd 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSource.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSource.java @@ -17,113 +17,19 @@ package org.apache.seatunnel.connectors.seatunnel.file.oss.source; -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.api.common.PrepareFailException; -import org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode; -import org.apache.seatunnel.api.source.SeaTunnelSource; -import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; -import org.apache.seatunnel.api.table.catalog.schema.TableSchemaOptions; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.common.config.CheckConfigUtil; -import org.apache.seatunnel.common.config.CheckResult; -import org.apache.seatunnel.common.constants.PluginType; -import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; -import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.connectors.seatunnel.file.config.FileSystemType; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorErrorCode; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException; -import org.apache.seatunnel.connectors.seatunnel.file.oss.config.OssConf; -import org.apache.seatunnel.connectors.seatunnel.file.oss.config.OssConfigOptions; -import org.apache.seatunnel.connectors.seatunnel.file.source.BaseFileSource; -import org.apache.seatunnel.connectors.seatunnel.file.source.reader.ReadStrategyFactory; +import org.apache.seatunnel.connectors.seatunnel.file.oss.source.config.MultipleTableOssFileSourceConfig; +import org.apache.seatunnel.connectors.seatunnel.file.source.BaseMultipleTableFileSource; -import com.google.auto.service.AutoService; +public class OssFileSource extends BaseMultipleTableFileSource { -import java.io.IOException; + public OssFileSource(ReadonlyConfig readonlyConfig) { + super(new MultipleTableOssFileSourceConfig(readonlyConfig)); + } -@AutoService(SeaTunnelSource.class) -public class OssFileSource extends BaseFileSource { @Override public String getPluginName() { return FileSystemType.OSS.getFileSystemPluginName(); } - - @Override - public void prepare(Config pluginConfig) throws PrepareFailException { - CheckResult result = - CheckConfigUtil.checkAllExists( - pluginConfig, - OssConfigOptions.FILE_PATH.key(), - OssConfigOptions.FILE_FORMAT_TYPE.key(), - OssConfigOptions.ENDPOINT.key(), - OssConfigOptions.ACCESS_KEY.key(), - OssConfigOptions.ACCESS_SECRET.key(), - OssConfigOptions.BUCKET.key()); - if (!result.isSuccess()) { - throw new FileConnectorException( - SeaTunnelAPIErrorCode.CONFIG_VALIDATION_FAILED, - String.format( - "PluginName: %s, PluginType: %s, Message: %s", - getPluginName(), PluginType.SOURCE, result.getMsg())); - } - String path = pluginConfig.getString(OssConfigOptions.FILE_PATH.key()); - hadoopConf = OssConf.buildWithConfig(pluginConfig); - readStrategy = - ReadStrategyFactory.of( - pluginConfig.getString(OssConfigOptions.FILE_FORMAT_TYPE.key())); - readStrategy.setPluginConfig(pluginConfig); - readStrategy.init(hadoopConf); - try { - filePaths = readStrategy.getFileNamesByPath(path); - } catch (IOException e) { - String errorMsg = String.format("Get file list from this path [%s] failed", path); - throw new FileConnectorException( - FileConnectorErrorCode.FILE_LIST_GET_FAILED, errorMsg, e); - } - // support user-defined schema - FileFormat fileFormat = - FileFormat.valueOf( - pluginConfig - .getString(OssConfigOptions.FILE_FORMAT_TYPE.key()) - .toUpperCase()); - // only json text csv type support user-defined schema now - if (pluginConfig.hasPath(TableSchemaOptions.SCHEMA.key())) { - switch (fileFormat) { - case CSV: - case TEXT: - case JSON: - case EXCEL: - SeaTunnelRowType userDefinedSchema = - CatalogTableUtil.buildWithConfig(pluginConfig).getSeaTunnelRowType(); - readStrategy.setSeaTunnelRowTypeInfo(userDefinedSchema); - rowType = readStrategy.getActualSeaTunnelRowTypeInfo(); - break; - case ORC: - case PARQUET: - throw new FileConnectorException( - CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION, - "SeaTunnel does not support user-defined schema for [parquet, orc] files"); - default: - // never got in there - throw new FileConnectorException( - CommonErrorCodeDeprecated.ILLEGAL_ARGUMENT, - "SeaTunnel does not supported this file format"); - } - } else { - if (filePaths.isEmpty()) { - // When the directory is empty, distribute default behavior schema - rowType = CatalogTableUtil.buildSimpleTextSchema(); - return; - } - try { - rowType = readStrategy.getSeaTunnelRowTypeInfo(filePaths.get(0)); - } catch (FileConnectorException e) { - String errorMsg = - String.format("Get table schema from file [%s] failed", filePaths.get(0)); - throw new FileConnectorException( - CommonErrorCodeDeprecated.TABLE_SCHEMA_GET_FAILED, errorMsg, e); - } - } - } } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java index 70395abf3bd..b332d99d472 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java @@ -19,9 +19,12 @@ import org.apache.seatunnel.api.configuration.util.OptionRule; import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.table.catalog.schema.TableSchemaOptions; +import org.apache.seatunnel.api.table.connector.TableSource; import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.api.table.factory.TableSourceFactory; +import org.apache.seatunnel.api.table.factory.TableSourceFactoryContext; import org.apache.seatunnel.connectors.seatunnel.file.config.BaseSourceConfigOptions; import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; import org.apache.seatunnel.connectors.seatunnel.file.config.FileSystemType; @@ -29,6 +32,7 @@ import com.google.auto.service.AutoService; +import java.io.Serializable; import java.util.Arrays; @AutoService(Factory.class) @@ -38,15 +42,24 @@ public String factoryIdentifier() { return FileSystemType.OSS.getFileSystemPluginName(); } + @Override + public + TableSource createSource(TableSourceFactoryContext context) { + return () -> (SeaTunnelSource) new OssFileSource(context.getOptions()); + } + @Override public OptionRule optionRule() { return OptionRule.builder() - .required(OssConfigOptions.FILE_PATH) - .required(OssConfigOptions.BUCKET) - .required(OssConfigOptions.ACCESS_KEY) - .required(OssConfigOptions.ACCESS_SECRET) - .required(OssConfigOptions.ENDPOINT) - .required(BaseSourceConfigOptions.FILE_FORMAT_TYPE) + .optional( + org.apache.seatunnel.connectors.seatunnel.file.config + .BaseSourceConfigOptions.TABLE_CONFIGS) + .optional(OssConfigOptions.FILE_PATH) + .optional(OssConfigOptions.BUCKET) + .optional(OssConfigOptions.ACCESS_KEY) + .optional(OssConfigOptions.ACCESS_SECRET) + .optional(OssConfigOptions.ENDPOINT) + .optional(BaseSourceConfigOptions.FILE_FORMAT_TYPE) .conditional( BaseSourceConfigOptions.FILE_FORMAT_TYPE, FileFormat.TEXT, diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/config/MultipleTableOssFileSourceConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/config/MultipleTableOssFileSourceConfig.java new file mode 100644 index 00000000000..caa1b4ca70d --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/config/MultipleTableOssFileSourceConfig.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.oss.source.config; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.connectors.seatunnel.file.config.BaseFileSourceConfig; +import org.apache.seatunnel.connectors.seatunnel.file.config.BaseMultipleTableFileSourceConfig; + +public class MultipleTableOssFileSourceConfig extends BaseMultipleTableFileSourceConfig { + + public MultipleTableOssFileSourceConfig(ReadonlyConfig ossFileSourceRootConfig) { + super(ossFileSourceRootConfig); + } + + @Override + public BaseFileSourceConfig getBaseSourceConfig(ReadonlyConfig readonlyConfig) { + return new OssFileSourceConfig(readonlyConfig); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/config/OssFileSourceConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/config/OssFileSourceConfig.java new file mode 100644 index 00000000000..16c3f0fa799 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/config/OssFileSourceConfig.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.oss.source.config; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.connectors.seatunnel.file.config.BaseFileSourceConfig; +import org.apache.seatunnel.connectors.seatunnel.file.config.FileSystemType; +import org.apache.seatunnel.connectors.seatunnel.file.config.HadoopConf; +import org.apache.seatunnel.connectors.seatunnel.file.oss.config.OssHadoopConf; + +import lombok.Getter; + +@Getter +public class OssFileSourceConfig extends BaseFileSourceConfig { + + private static final long serialVersionUID = 1L; + + @Override + public HadoopConf getHadoopConfig() { + return OssHadoopConf.buildWithConfig(getBaseFileSourceConfig()); + } + + @Override + public String getPluginName() { + return FileSystemType.OSS.getFileSystemPluginName(); + } + + public OssFileSourceConfig(ReadonlyConfig readonlyConfig) { + super(readonlyConfig); + } +} diff --git a/seatunnel-dist/pom.xml b/seatunnel-dist/pom.xml index dac8d6978bd..a97cd99aa48 100644 --- a/seatunnel-dist/pom.xml +++ b/seatunnel-dist/pom.xml @@ -636,12 +636,6 @@ - - org.apache.hadoop - hadoop-aliyun - ${hadoop-aliyun.version} - provided - org.apache.hadoop hadoop-aws diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/pom.xml new file mode 100644 index 00000000000..346fed24a3d --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/pom.xml @@ -0,0 +1,70 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-connector-v2-e2e + ${revision} + + + connector-file-oss-e2e + + 3.4.1 + + + + + org.apache.seatunnel + connector-fake + ${project.version} + test + + + org.apache.seatunnel + connector-file-oss + ${project.version} + test + + + com.aliyun.oss + aliyun-sdk-oss + ${aliyun.sdk.oss.version} + test + + + org.apache.seatunnel + seatunnel-hadoop3-3.1.4-uber + ${project.version} + optional + test + + + org.apache.avro + avro + + + + + org.apache.seatunnel + connector-assert + ${project.version} + test + + + + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssFileIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssFileIT.java new file mode 100644 index 00000000000..6ff7ae9c1e8 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssFileIT.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.file.oss; + +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; +import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.container.TestHelper; +import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; +import org.apache.seatunnel.e2e.common.util.ContainerUtil; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; + +import io.airlift.compress.lzo.LzopCodec; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +@Disabled("Disabled because it needs user's personal oss account to run this test") +public class OssFileIT extends TestSuiteBase { + + public static final String OSS_SDK_DOWNLOAD = + "https://repo1.maven.org/maven2/com/aliyun/oss/aliyun-sdk-oss/3.4.1/aliyun-sdk-oss-3.4.1.jar"; + public static final String JDOM_DOWNLOAD = + "https://repo1.maven.org/maven2/org/jdom/jdom/1.1/jdom-1.1.jar"; + public static final String HADOOP_ALIYUN_DOWNLOAD = + "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aliyun/3.1.4/hadoop-aliyun-3.1.4.jar"; + + @TestContainerExtension + private final ContainerExtendedFactory extendedFactory = + container -> { + Container.ExecResult extraCommands = + container.execInContainer( + "bash", + "-c", + "mkdir -p /tmp/seatunnel/plugins/oss/lib && cd /tmp/seatunnel/plugins/oss/lib && curl -O " + + OSS_SDK_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/plugins/oss/lib && curl -O " + JDOM_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/plugins/oss/lib && curl -O " + + HADOOP_ALIYUN_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/lib && curl -O " + OSS_SDK_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", "-c", "cd /tmp/seatunnel/lib && curl -O " + JDOM_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/lib && curl -O " + HADOOP_ALIYUN_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + }; + + /** Copy data files to oss */ + @TestTemplate + public void testOssFileReadAndWrite(TestContainer container) + throws IOException, InterruptedException { + // Copy test files to OSS + OssUtils ossUtils = new OssUtils(); + try { + ossUtils.uploadTestFiles( + "/json/e2e.json", + "test/seatunnel/read/json/name=tyrantlucifer/hobby=coding/e2e.json", + true); + Path jsonLzo = convertToLzoFile(ContainerUtil.getResourcesFile("/json/e2e.json")); + ossUtils.uploadTestFiles( + jsonLzo.toString(), "test/seatunnel/read/lzo_json/e2e.json", false); + ossUtils.uploadTestFiles( + "/text/e2e.txt", + "test/seatunnel/read/text/name=tyrantlucifer/hobby=coding/e2e.txt", + true); + ossUtils.uploadTestFiles( + "/text/e2e_delimiter.txt", "test/seatunnel/read/text_delimiter/e2e.txt", true); + ossUtils.uploadTestFiles( + "/text/e2e_time_format.txt", + "test/seatunnel/read/text_time_format/e2e.txt", + true); + Path txtLzo = convertToLzoFile(ContainerUtil.getResourcesFile("/text/e2e.txt")); + ossUtils.uploadTestFiles( + txtLzo.toString(), "test/seatunnel/read/lzo_text/e2e.txt", false); + ossUtils.uploadTestFiles( + "/excel/e2e.xlsx", + "test/seatunnel/read/excel/name=tyrantlucifer/hobby=coding/e2e.xlsx", + true); + ossUtils.uploadTestFiles( + "/orc/e2e.orc", + "test/seatunnel/read/orc/name=tyrantlucifer/hobby=coding/e2e.orc", + true); + ossUtils.uploadTestFiles( + "/parquet/e2e.parquet", + "test/seatunnel/read/parquet/name=tyrantlucifer/hobby=coding/e2e.parquet", + true); + ossUtils.uploadTestFiles( + "/excel/e2e.xlsx", + "test/seatunnel/read/excel_filter/name=tyrantlucifer/hobby=coding/e2e_filter.xlsx", + true); + ossUtils.createDir("tmp/fake_empty"); + } finally { + ossUtils.close(); + } + + TestHelper helper = new TestHelper(container); + + helper.execute("/excel/fake_to_oss_excel.conf"); + helper.execute("/excel/oss_excel_to_assert.conf"); + helper.execute("/excel/oss_excel_projection_to_assert.conf"); + // test write oss text file + helper.execute("/text/fake_to_oss_file_text.conf"); + helper.execute("/text/oss_file_text_lzo_to_assert.conf"); + helper.execute("/text/oss_file_delimiter_assert.conf"); + helper.execute("/text/oss_file_time_format_assert.conf"); + // test read skip header + helper.execute("/text/oss_file_text_skip_headers.conf"); + // test read oss text file + helper.execute("/text/oss_file_text_to_assert.conf"); + // test read oss text file with projection + helper.execute("/text/oss_file_text_projection_to_assert.conf"); + // test write oss json file + helper.execute("/json/fake_to_oss_file_json.conf"); + // test read oss json file + helper.execute("/json/oss_file_json_to_assert.conf"); + helper.execute("/json/oss_file_json_lzo_to_console.conf"); + // test write oss orc file + helper.execute("/orc/fake_to_oss_file_orc.conf"); + // test read oss orc file + helper.execute("/orc/oss_file_orc_to_assert.conf"); + // test read oss orc file with projection + helper.execute("/orc/oss_file_orc_projection_to_assert.conf"); + // test write oss parquet file + helper.execute("/parquet/fake_to_oss_file_parquet.conf"); + // test read oss parquet file + helper.execute("/parquet/oss_file_parquet_to_assert.conf"); + // test read oss parquet file with projection + helper.execute("/parquet/oss_file_parquet_projection_to_assert.conf"); + // test read filtered oss file + helper.execute("/excel/oss_filter_excel_to_assert.conf"); + + // test read empty directory + helper.execute("/json/oss_file_to_console.conf"); + helper.execute("/parquet/oss_file_to_console.conf"); + } + + private Path convertToLzoFile(File file) throws IOException { + LzopCodec lzo = new LzopCodec(); + Path path = Paths.get(file.getAbsolutePath() + ".lzo"); + OutputStream outputStream = lzo.createOutputStream(Files.newOutputStream(path)); + outputStream.write(Files.readAllBytes(file.toPath())); + outputStream.close(); + return path; + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssFileWithMultipleTableIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssFileWithMultipleTableIT.java new file mode 100644 index 00000000000..39b97ce3ec8 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssFileWithMultipleTableIT.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.file.oss; + +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; +import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.container.TestHelper; +import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; + +import java.io.IOException; + +@Disabled("Disabled because it needs user's personal oss account to run this test") +public class OssFileWithMultipleTableIT extends TestSuiteBase { + + public static final String OSS_SDK_DOWNLOAD = + "https://repo1.maven.org/maven2/com/aliyun/oss/aliyun-sdk-oss/3.4.1/aliyun-sdk-oss-3.4.1.jar"; + public static final String JDOM_DOWNLOAD = + "https://repo1.maven.org/maven2/org/jdom/jdom/1.1/jdom-1.1.jar"; + public static final String HADOOP_ALIYUN_DOWNLOAD = + "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aliyun/3.1.4/hadoop-aliyun-3.1.4.jar"; + + @TestContainerExtension + private final ContainerExtendedFactory extendedFactory = + container -> { + Container.ExecResult extraCommands = + container.execInContainer( + "bash", + "-c", + "mkdir -p /tmp/seatunnel/plugins/oss/lib && cd /tmp/seatunnel/plugins/oss/lib && curl -O " + + OSS_SDK_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/plugins/oss/lib && curl -O " + JDOM_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/plugins/oss/lib && curl -O " + + HADOOP_ALIYUN_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/lib && curl -O " + OSS_SDK_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", "-c", "cd /tmp/seatunnel/lib && curl -O " + JDOM_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/lib && curl -O " + HADOOP_ALIYUN_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + }; + + /** Copy data files to oss */ + @TestTemplate + public void addTestFiles(TestContainer container) throws IOException, InterruptedException { + // Copy test files to OSS + OssUtils ossUtils = new OssUtils(); + try { + ossUtils.uploadTestFiles( + "/json/e2e.json", + "test/seatunnel/read/json/name=tyrantlucifer/hobby=coding/e2e.json", + true); + ossUtils.uploadTestFiles( + "/text/e2e.txt", + "test/seatunnel/read/text/name=tyrantlucifer/hobby=coding/e2e.txt", + true); + ossUtils.uploadTestFiles( + "/excel/e2e.xlsx", + "test/seatunnel/read/excel/name=tyrantlucifer/hobby=coding/e2e.xlsx", + true); + ossUtils.uploadTestFiles( + "/orc/e2e.orc", + "test/seatunnel/read/orc/name=tyrantlucifer/hobby=coding/e2e.orc", + true); + ossUtils.uploadTestFiles( + "/parquet/e2e.parquet", + "test/seatunnel/read/parquet/name=tyrantlucifer/hobby=coding/e2e.parquet", + true); + ossUtils.createDir("tmp/fake_empty"); + } finally { + ossUtils.close(); + } + } + + @TestTemplate + public void testFakeToOssFileInMultipleTableMode_text(TestContainer testContainer) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(testContainer); + helper.execute("/text/fake_to_oss_file_with_multiple_table.conf"); + } + + @TestTemplate + public void testOssFileReadAndWriteInMultipleTableMode_excel(TestContainer container) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(container); + helper.execute("/excel/oss_excel_to_assert_with_multipletable.conf"); + } + + @TestTemplate + public void testOssFileReadAndWriteInMultipleTableMode_json(TestContainer container) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(container); + helper.execute("/json/oss_file_json_to_assert_with_multipletable.conf"); + } + + @TestTemplate + public void testOssFileReadAndWriteInMultipleTableMode_orc(TestContainer container) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(container); + helper.execute("/orc/oss_file_orc_to_assert_with_multipletable.conf"); + } + + @TestTemplate + public void testOssFileReadAndWriteInMultipleTableMode_parquet(TestContainer container) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(container); + helper.execute("/parquet/oss_file_parquet_to_assert_with_multipletable.conf"); + } + + @TestTemplate + public void testOssFileReadAndWriteInMultipleTableMode_text(TestContainer container) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(container); + helper.execute("/text/oss_file_text_to_assert_with_multipletable.conf"); + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssUtils.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssUtils.java new file mode 100644 index 00000000000..0d1b90534f7 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/oss/OssUtils.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.file.oss; + +import org.apache.seatunnel.e2e.common.util.ContainerUtil; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.aliyun.oss.ClientException; +import com.aliyun.oss.OSS; +import com.aliyun.oss.OSSClientBuilder; +import com.aliyun.oss.OSSException; +import com.aliyun.oss.model.PutObjectResult; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; + +public class OssUtils { + private static Logger logger = LoggerFactory.getLogger(OssUtils.class); + private OSS ossClient = null; + private String endpoint = "https://oss-accelerate.aliyuncs.com"; + private String accessKeyId = "xxxxxxxxxxxxxxxxxxx"; + private String accessKeySecret = "xxxxxxxxxxxxxxxxxxx"; + private String bucket = "whale-ops"; + + public OssUtils() { + OSSClientBuilder ossClientBuilder = new OSSClientBuilder(); + ossClient = ossClientBuilder.build(endpoint, accessKeyId, accessKeySecret); + } + + public void uploadTestFiles( + String filePath, String targetFilePath, boolean isFindFromResource) { + try { + File resourcesFile = null; + if (isFindFromResource) { + resourcesFile = ContainerUtil.getResourcesFile(filePath); + } else { + resourcesFile = new File(filePath); + } + FileInputStream fileInputStream = new FileInputStream(resourcesFile); + PutObjectResult result = ossClient.putObject(bucket, targetFilePath, fileInputStream); + } catch (OSSException oe) { + logger.error( + "Caught an OSSException, which means your request made it to OSS, " + + "but was rejected with an error response for some reason."); + logger.error("Error Message:" + oe.getErrorMessage()); + logger.error("Error Code:" + oe.getErrorCode()); + logger.error("Request ID:" + oe.getRequestId()); + logger.error("Host ID:" + oe.getHostId()); + } catch (ClientException ce) { + logger.error( + "Caught an ClientException, which means the client encountered " + + "a serious internal problem while trying to communicate with OSS, " + + "such as not being able to access the network."); + logger.error("Error Message:" + ce.getMessage()); + } catch (FileNotFoundException e) { + throw new RuntimeException(e); + } + } + + public void createDir(String dir) { + try { + PutObjectResult result = + ossClient.putObject(bucket, dir, new ByteArrayInputStream("".getBytes())); + } catch (OSSException oe) { + logger.error( + "Caught an OSSException, which means your request made it to OSS, " + + "but was rejected with an error response for some reason."); + logger.error("Error Message:" + oe.getErrorMessage()); + logger.error("Error Code:" + oe.getErrorCode()); + logger.error("Request ID:" + oe.getRequestId()); + logger.error("Host ID:" + oe.getHostId()); + } catch (ClientException ce) { + logger.error( + "Caught an ClientException, which means the client encountered " + + "a serious internal problem while trying to communicate with OSS, " + + "such as not being able to access the network."); + logger.error("Error Message:" + ce.getMessage()); + } + } + + public void close() { + if (ossClient != null) { + ossClient.shutdown(); + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/e2e.xlsx b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/e2e.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..87d363d7db3bbe23b416ef1297979146b04423ea GIT binary patch literal 5823 zcmaJ_bzGF~wxyc^1f)S41PSSu?nYve5{8DMTRKFBPDyD=k(LfA0V(NbC~093kh;V7 zoqObZ&-wPuJ2QX0d;gxjo;~kc&(csvMj=ALz`#J@seG=Ba4!hqzYV3qj&2}FHxnIi zCy=WVhnItWo%Wz(7Z?6ja{u&WewfB{;vi|7zK10+#lW;tI-J-~RM<&UZRRq5we=JL*)9UEcSJ@C%X7$l8$-|S-gEj>L zm0s)UgtuCeb<^^Br^a#2RmkYRgg?SD%6*pNzEo03#Ygf`#$XS=-C45c7_dIqs~xSE z3j)|~ybqPuoP4*N_ByRP=+)OxDi_a%9}vl86TS&9xL{n0`1n3mYE&8~JM1*3i zCP#LDko!b%p7rd-4274tSoy&-;?8ZSH3c-fZvTc?d6hEg@A z6AM0+GKc=M{jG~Tk}~5-!5GG`N_>SOwaXz?h~%Mb#>_-rbBvWL>;MQK_aT z)RC*Byz(8<@p6=9=cq~*zex40a7oY?<+bBpHZBugoniR{ej(qrv+CzL6Axc(1N{2$ zTTO7k9dob?=&m1!xOt_|Ttu<~h3lU%b&9_*i=6}gwc zYh#?Ex(1HZzolg$?!fbKIFjmc*b$UHQ~IYZasfYpU|Pds3a8BCAc7D zJ{E@f>H*&zkD1+ggeh4diP+8$Fpu zO(+sBee2YZ{DlEa|GesB{Go-3u~CEJIFYwzk$3#V+CoN=Po^8QuBh#sR;$yqjhjy& zj+<9Ex4->#`YBj~Pax+G!**h^DpdI2r3!xicMOT(7<$=rx>}pLfGo7#Tx=YzT<>6X zPqovgQzQ&)Z*LD?;d!~pSqlx14t^7i!P3P2LoNiD*X0oHbL91*Z-TQY9U8x7@V2%a zv4MelHv7oUd77F_hF3Dv$!votXSE+k_URUQ!2bNnvW=eFi<{V!+pI#Hl--f(nmt3~ z)#OulgG%V}2gLHH%IzES2K6ji*+tiyqS51Pegq>&!=`QNa%AhaUg;dV&ajl2HNgxu zHB|FWH*&_Bs|$nl%pB0x^{sk-&#aAyzlRd?Esk8-^I8!~wHjh1G0MZ?twh0eD*t%K z8l$I!G3C6`Oo{o+lvI0+-TBav0a4+nSNtYtOh4j59tN4z-eO;3TCN_R5z2VmYflIc z=Y5ZFso0!fMh{r07y%IgTz z4OV7dKU>x_Ff!iD3n{F4vMwkPmU#kO+3uzpx{jv-bU6p(Lv0o+9<5g`eBsEw%$nW@ zG&MH|+!j0$kC?RDHn?n>4t10n6Af#djhSbKZB+_Wuis+btQ-5#KhZAjc+0)SW%W_4 zvXNkcOM695m@Ul$r}?R|VJGS(bqgd>k&jd+kryC{7pl3i8pPw6ti!IMD+t1!(&-Xm zOd<+mc^ZSm?a64g@xr zC_@|Fi&e-*YALHIO%0U-OSuvaG;Byr>EZ(2^np$zG%P zMyE%g?Lf8i*?4L~89F8sfGK_iPB$U5j3%qWN|6`N7CbyUFKGvqIHk_k9+ z$~8o|k>*K_MtnvmyzP{~{`K-kPR7KjxVHQ3i**CTV3~N&I?1We2c?^*3VC?PQHsSG zaZ5!cMx}wnYjr0`*NwP(8Dcc1={#0}g-J6s9`Y%je6R3X<^r8Jy(ERdi{T?trq+!d ztPG0QqLIkWKJzxMaGne5(n({#$?c%R8c6VbmrY8TvZ5h-*om?bXj7(X;d|ImKDKop zh3i45gMd8sl^%(Gjc+2ojBe<)w__`UZPkOuNfPp%&_PNP#>NO!c}TYQF1cC^gjYvrSxMIKc{Gj&&Q_{}qS1~tNt`fZ1cZh5#0HxPR z`~ApfNm;ug71m z9?#GvI6QDPriISPCLa+B`>-)vyc%%HmL28H&j4bGWkCA)kn$67F$!Qjb_~8;`23}0 z@)~hRj*(DT)Cwverg1#E%#;@{=;*6bT|$K`%ORG{uI?A=h6I8h5pMwH^b7@E2A$8+ z!iT%!MvXPsN$HmZbNA-4LOWd^W)Bu}`KkAAmXa^|Wz~qRKJHKnk|Ij!)`|1pp_Lsb zlMM5~Ak<2;YA-q`EOJ#zP2*(sPHWoP1Znq9mCalFN%Kt{PQEXLh#yExZEUX9slOjO zEWPG9UDvHp=HOWr?*;h;=6uDTTRWAQC z!HQNexT3EIX}HG^k3UEKFjc}3OSu1LF;KWv-WTaH^|hi%7yS#Cox~quN(n@ z35>FEgEAdSX?ZMK8vShY$opvbR|j$$RwcQVxdJrx^rn{( zfWPsH^9`GG@p<>!75I)diCREfFf;^&Fr0s4P27L6rkl4t=r6jgOHx(8lbMHCZ&=Ra zx!(ns>;n?fC|8gaUexq1(j99uk^wJH1y#wi5NHv1dt#tx9}}B~tqd#ru?0CRShoj$ zq&?tDCLS-x`gLAKYBXx#opd?+xX}a|B^U{c-2}*e`*gF6&-x(N0U6EHc9$2g(T#1B z0{z7~%lI4BUMR$d1NVt*%@{V7Xvto0XmIv{6vJDJpb87CB!-0rEEemik?o%z3@9Lz z&76Q#^~w*$K=syJxwL)kh}m**-H@G~xUhIGtH$O{a*I<~yq7!6`||1-1$t9#jP`9M z7hG1mugNcj$wK(1wdz(qNe7PLp?<}ui+xFM#!EXO}IU6p8uKw}# zfcN*tdV*c-UV_1PcSY;|H^-ablDR0K%Gg?a9kNQtfj*$%##T z+z~%v%L}$Fp7rn1%!(IWx5%9%)kyO@kP-%ZEw?X;chTMO#=i({84IJD&EnTMr;#(lf zx^J_ORyL#8H8#_gF}8=peidD#-3_}O+G6T7ykoOL}mAF$$8?ckID z?DoyrE9db}(tJlw^U>X@YXoyPXKOk#<;(5%oaXMSXXqj@2FQ^wUhR_q*F^6=I{L(r zN}J9{s4?`o0OK=tfiqJXFL{wTR!-znWW_d9zcH=Bf|8XcVau!7luLh{ z^Zr!_xp8cd=D?6v${OLQF=*)Yhx4MF!uIRQ04E^@sc+ieREu@W{wT$y&5b`dvG1@6 zM;3|y2Jf8bA5T=Izvso(8U%84g)3?J7x&%ZgryF+%nRa&AD$4gFQKE1>T4%xDYA2Z zBvu{vC}y-RQOJvWxhHzV%P24H(G5z)EBFP@n>{~5rhIkvX&E%~+;w3p!nrbZsAi|? zed>I-Pv-5(+36wet>A?NOiN%nHlx&*=NnB7)Ni6*o#xG270ItS5zRKR15Hsml%Sw+Xu;TdLNPZ{>W8_%s+buw8KNY1m5XJ8d8U zGdBu2bUa9bQnJ|(_O+K9)mgK(fHP>Gry5nkcD4BecxS7D$r2v>Ij4gTbw*&IpJo*J zmtB8Z^%Lg&BL4w$h-pyN!CGQhQ11o7VGgs4jwlpAYIywyGxs2E{@%C1j`k)+QM9rQd1N!(AQ=a<6vwz( z&O~C~f9m=R{(hcCHPZePU^BNB8VWm5mXZQ`xu=L`GdxD^hLsD#h{(t2YclX{pvaSp zPFW8S-qmSyF;2K;c)iyE21RuSMe53e_I2XlwnJ?m)g~H;U@{?*j{0LIkz^Cg;s|Eq z$ti)ernKVq7nd9P0B(>>}$Me9~3@5v*fY5Kc zlG`^5y`q}VDiataDXN41A^M7M%CG4{jy3SR!tO@6H=w7$uY(} zl%M7>sOY3XjB4-e$O+7NnQRFF&YlmF&<{JBSnl;(on*sip+pcl?A`LzN;~$qhu(Q>0_9^bOnF5kfaX$C72LGB zQ`SpiV@`B*A@Na3D1$)%Zg*q-!0nK5*4&+xsF0&Cv=9Zw&*AySE)Ts^2sP1)rL8(g zRi9XKYqoPe6vo1lQIQ~EAJ9LGMANq7WhD)Acj{$B*n@PXSKvMD(;IUpR}Gd!ZS3f5 z@u9Ell^Fo*=eLep8vr!yR21IxF)@52-*2X<+fYHskB{P2=&p$0O?qvd?r3htTl5)7 zhA{=LCl29zCm0>66)HdwJBvo50Q+LycV?3flBs$f9F3R@5q#j;D-%${Glv3cyx<|# zhKA_Py%Spl@|{m*%4WPALmP8P!y{|7jSL$mByL}Y9CYPDg}#vyKG=c4H$m=n$nYpeS8HiW8LSce6MzhsdJ4d-3|E`sSd6wu3Fj__DY}oYHb*} z-h37_YB-&+b36<4yLgjxg?{(^IfYMp@Y`#=?}XiXjsG|PgY$U5 zz@1@s-!8f{%zlR-oC@yQXMZN%H)ihKq~DPSe@_1CE&Z8&-;%g9Z+?dr^1bZ;wQ~MU zzpu^ie2d>v1_$atxfp*|xGw_kG~Dl4g~M=H;opkx&(!-|d8ZP7M-#jfJoP>^{+WEA zOz!CPcYFo>mHc1G_0Kx@hvJTUe#Z`cP5)ZN|05y|WmLGLML@uWza-$>WcwF*{{u*V B1w#M; literal 0 HcmV?d00001 diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/fake_to_oss_excel.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/fake_to_oss_excel.conf new file mode 100644 index 00000000000..37edbef4e22 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/fake_to_oss_excel.conf @@ -0,0 +1,77 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } +} + +sink { + OssFile { + path="/test/seatunnel/sink" + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + file_format_type = "excel" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_projection_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_projection_to_assert.conf new file mode 100644 index 00000000000..99744c93bca --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_projection_to_assert.conf @@ -0,0 +1,108 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/excel" + result_table_name = "fake" + file_format_type = excel + field_delimiter = ; + read_columns = [c_string, c_boolean] + skip_header_row_number = 1 + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_to_assert.conf new file mode 100644 index 00000000000..477ca41b26d --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_to_assert.conf @@ -0,0 +1,134 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/excel" + result_table_name = "fake" + file_format_type = excel + field_delimiter = ; + skip_header_row_number = 1 + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_to_assert_with_multipletable.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_to_assert_with_multipletable.conf new file mode 100644 index 00000000000..19d3f375a35 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_excel_to_assert_with_multipletable.conf @@ -0,0 +1,132 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + tables_configs = [ + { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/excel" + file_format_type = excel + field_delimiter = ; + skip_header_row_number = 1 + schema = { + table = "fake01" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + }, + { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/excel" + file_format_type = excel + field_delimiter = ; + skip_header_row_number = 1 + schema = { + table = "fake02" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } + ] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + table-names = ["fake01", "fake02"] + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_filter_excel_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_filter_excel_to_assert.conf new file mode 100644 index 00000000000..b66eccceb02 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/excel/oss_filter_excel_to_assert.conf @@ -0,0 +1,135 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/excel_filter" + result_table_name = "fake" + file_format_type = excel + field_delimiter = ; + skip_header_row_number = 1 + file_filter_pattern = "e2e_filter.*" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/e2e.json b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/e2e.json new file mode 100644 index 00000000000..aff56314e7c --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/e2e.json @@ -0,0 +1,5 @@ +{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/e2e.json.lzo b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/e2e.json.lzo new file mode 100644 index 0000000000000000000000000000000000000000..e2d48d8fbc098cbd8b98916d465037891bf53422 GIT binary patch literal 3466 zcmY*cOK%(38Kq=dc8qA8#z~MAAmC6+q{u`1zUSUi(uN`_ks>8h)KITDP$`mnP!#nd zEm4617jOX=aR4o}5K^}F!UcrJMOq*WFS^L0ixeoj>82>U2+%IlG=HFr0zFp&3dAtr zIm4NI@0{;^=Y0NYZaE(sxc2>PFT8NG@Z7sSmqQnSe*C2Nlg1w}E@QR6p-||>=}>53 zF%%m6AW~V`F7HNsTvTQ&a}hsM*x%SFMBGSccWSu}JbjR_E&?x2j8829Zx$9Oi@=+^ z^}_&otFm601y1c|vqj+gdU0(ra?i!e<^BC~C*r@SwdZ+?65>+DIpbPr=>i%8M6|02 zQB+8exm0M+cnsiuyTCzff3vxcMHk}xsc~#lSy|oLY1GQih`&~D9MtTc)@HM_*=$Ap zs38|GZI>I3iyKrLc5&-lM42a;XSlprXLJ4HuJ9<+L}6RX7|z6)us^RgcFI_i#uOvM zQz@?P-Re$zwNZ=sEapj^UTaDO@w8+qPQ23_1D#gw0H>YhGyC(+*>@tR{R8C|o{uoB z8zmyjyw7?=;aX)AXN>q-OPok?BMcSPqrx*frn%ITQ_Vc0r6f#<^KjpG?VwfO-W}FG z*JzZffO?;4IP1j#s-Xbhcb=0Tw44 z+mrZtu|3^M0dMWkW%hw53N@U`jbzp~?xui?wd~Q3tz%*(UM$9!Y#o)9l4<4|1saXV zjZ3LwD41a!wN*@jNL)&xh*X+VR57tK7ZfuycFWbJdS}_5Xk|Q~&!yrs|F2?VCCJPy zWo96#C}s4t|N3-1m%G4NTQd93Y0rT1JjRp|=3iauO@ad}%y~ucI-kH5W{1W{mNSL4}F#UVDBiNZBR`QuDbWU3%x_ zm7ZigNDbfY??LgTEnX^-!Pz1{m@twN&!dtvaX|x#ajK*u9^ud$=R#2a+i>4>YA%RR z7f?J$MX3lF_X+-z$h~{l-g@GMPU6|k*(2C-)%jrxu+Rr%D6@oj|s_%i=LBc0~FEa zsHsr|JwXKyjL)F@yRDa-oQ@yfpY?y`s7JdaPn=#RHDS28jE^K>mOicjv+W4H%miT^ zdVrpde9%Aq#OWhcE6U^}f8^Y`&SS(A3O$Kl*S7Ul?5Wc?&vxszZ6SW+(1))LKXpPc zQ6spfAX1_~sIuJ|!;(sC#yumc@vOMSxpPHFi3y1DIaB9OXl!q~S~|AGG%C4i5VKl2 zoX7z0t#3^Qz>WRFrfrR>Vzs^oe7s%Uw8Xdf@>xstOe&q60&X2ot(IJ@7{P833Qhsi zU=+e|LS5<+=uBU*Mj!$TA0w`@t*nH8|Fro{yLUcb&92|SGyJL3+He2x6Guq#;Lhp( z6^iDD#~7|2)&JDKt|U5$TG>znNgAeIP$INep3q%?^r`c_@+698T?#VeI(F{#m6|h+ z;^GcHcY3THJa`T4hHjv$LJKQL9N0cfMT`;y%E26w0%j%k=_};i>1R=@1IB!gB|>BK zxyGLLt^8qWcNa8FFD%x!fV0WeQWCfrU(Q$>k0+<*rhpIY<1?_YNTv|nZ38cDO%`m| z6ur`1FIXzIFg5gLouE z*{IY!cINa|(i_Li^Kp8Hg`-5Cy*hjbv7vdjVi2uFIN4L7jxmEkVhW8TxfEy&^XP=6 z!+pZajjAs`W$AEetdm(Q+BRGd##1@auv2Pgta>I}+jGajnaTDc961tn5_wD0_+e0j zB}LNf%~BJ1dSX9s!>w`<)b`viD#(d4XbjH=C4ro*=)oYG6bivz26r)M|L*swr(w15 z->%bpSiYNFJAKdm+z}^#8=VMy;-vYk)vpXXNP3)(ro)axV+(MFe_)@A@z0LF_`09R zINBX$ik>+GCT1W?r3_)xP|y3TKavbzDmLeLie&J~(CvvZG524yl)b1aM`EEvQASOW zU05(i1&YWxl^`8DeD=}pQ|BtBQKACEe0avYpe#R^&e)-1qA}OrwiJ}-4l#X1vP-jD zS>VmZZ8Rp7*4SF#1Fq-CYnI|-a_uk!ygjpBwv^3wj%o|hQzta2P(KeIuUu-N3&L<5 z7g5fjHc!FUY=1%o_5sF_t&GCPPy+pCnWF{gJM7mB;W5d=5rCm>%qrNRn>yy&LW;&R0d5uG2)=L}?5u`5csKFl}*Vb&aQyHFj9Ct^3O|Qs<3VZ{9w4 zt`a;i1FC(Z;B=v}ax#9L#YZ1ZXY!jMVP>wATm%lXb2~N@rgBFc3E-LC+JYTUa`Afm zLXz9*WE?nM$`tH8)QRV_d(lrEB2@Ro&}D6@vFVBV&$YSWp`f1o;4haU74D+T2Fr?D zq3s?P=bA`@t~2y-=&g@@dGhCxFK_jr_WEIOx7E)K0mGQ~MoIWO#!(MxBW=oYWGfM) zI^2WcG~NG3yN*L+3`}9jvBQVNRQ-m_M;<$uf)w2;Ep$y3>)%GF-SDI!AO+zG;Y#d) zVKau@QD_I7y+~7~p1ne>cft$W?se`*9ptRWBu!DU%Uk_|L zq-qoSS>VFl?xYRPrPk5lkKE9uc%mNI8GR|#;L{cHtb24d2q}d->#p&)b(^ZCL)xF>vFU7~{x7j1-ty@%3Wp@esVNzt-Gsmln6| zm_GW*?*R?;v)(sB!OQsHfI(K_s2FWIcEtrG3zcGgLD7i~dW7sFe*j6!0^<2Xeml{9 J@7w=!{STPQV2J<# literal 0 HcmV?d00001 diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/fake_to_oss_file_json.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/fake_to_oss_file_json.conf new file mode 100644 index 00000000000..4869398efab --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/fake_to_oss_file_json.conf @@ -0,0 +1,83 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + FakeSource { + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + result_table_name = "fake" + } +} + +sink { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/tmp/seatunnel/json" + row_delimiter = "\n" + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + file_name_expression = "${transactionId}_${now}" + file_format_type = "json" + filename_time_format = "yyyy.MM.dd" + is_enable_transaction = true + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_lzo_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_lzo_to_console.conf new file mode 100644 index 00000000000..a17fd8679b3 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_lzo_to_console.conf @@ -0,0 +1,143 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + result_table_name = "fake" + path = "/test/seatunnel/read/lzo_json" + row_delimiter = "\n" + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + file_name_expression = "${transactionId}_${now}" + file_format_type = "json" + compress_codec = "lzo" + filename_time_format = "yyyy.MM.dd" + is_enable_transaction = true + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + } +} + +transform { + sql { + source_table_name = "fake" + result_table_name = "sqlresult" + query = "select * from fake where c_string = 'WArEB'" + } +} + +sink { + Assert { + source_table_name = "sqlresult" + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 1 + }, + { + rule_type = MIN_ROW + rule_value = 1 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + equals_to = "WArEB" + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_smallint + field_type = short + field_value = [ + { + equals_to = 15920 + } + ] + }, + { + field_name = c_date + field_type = date + field_value = [ + { + equals_to = "2022-04-27" + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_to_assert.conf new file mode 100644 index 00000000000..36b61de04bf --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_to_assert.conf @@ -0,0 +1,132 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/json" + file_format_type = "json" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_to_assert_with_multipletable.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_to_assert_with_multipletable.conf new file mode 100644 index 00000000000..18063b90464 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_json_to_assert_with_multipletable.conf @@ -0,0 +1,128 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + tables_configs = [ + { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/json" + file_format_type = "json" + schema = { + table = "fake01" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + }, + { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/json" + file_format_type = "json" + schema = { + table = "fake02" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + } + ] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + table-names = ["fake01", "fake02"] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_to_console.conf new file mode 100644 index 00000000000..c80a194bfbf --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/json/oss_file_to_console.conf @@ -0,0 +1,45 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/tmp/fake_empty" + file_format_type = "json" + # schema is needed for json type + schema { + + } + } +} + +sink { + Console {} +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/orc/e2e.orc b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/orc/e2e.orc new file mode 100644 index 0000000000000000000000000000000000000000..d50f6bb54dd6af1f3bdd521dcb829d989c503c78 GIT binary patch literal 5730 zcmc&%c~nzp7JqrkgU18m5f&d&NW|KD>?x8EvYnoG*h1Js79a_yqfJ7{0$Cs-5ZW4C z+N!11Dz2lB$4-k=M{C{MTF=yR!M0=7sdcICap~sNx=^QDZLQAtK3*7e)$P6OHm#gWb}d}h{r#eiw+_$`e3)P%+zB|w z8y5!(arTO0UTHE|tUal?Qknr2uQbhOtL{F>*Pc6KTG$s(p7wq_=Ipr=2IuIX{xCS_ z9eA<>$3zw)Qi2P&4T`aT)(u)3as}ZjRJd>H2_*~3;K-F zMx#hy;Bf)F#$Kem-)O{v8;1@KAGRq2&)}xu{TmLhWRY*Izi=ptI{5adcMj7{x_gea zMr$WX#pQ3)*F>2kdSV7^0Op7yUtm!;=o(P)9xfP%1pM?Gl-x6yP4yhUcaZvTUw$oy zy!`3vcN3}WM`v!408at*EMbumz8;f>gLKCh&By@PPaze zrqeY+461V1`zz{OiwoQ~Z(GpSZr4>dG}gK6bbha0X|eg-7OPk9RtFuvHch)lZ3z0> zJ5@H-ZVE%%bvB*b)8?!7>a~?+)w=ekPD4#~d7Y`w(@|CB(1D2BTARmi^cnS45Xr1n zB~9gxo|dwjfU7L1bJljM8|s`DDx0OTx}wxx=5w@#LdswTL_bfn+TR>>*^3Hlm4UL5 z-Ww=4m{d)T#%67!UP&i_Vz=~3ix7*lLRSGHPpfm74616S)(N2x#qRX~YGG%n9-GFw zB)FO089cc%ttZGa?wGzy&R;kF?dwWS`2MI4zbelPXO0~Dg3)_1CWw*x8Otqik!Qo{R>hr&;u{P4N(iLz&JijZl~+_?~* zo%PpUi%*;mNASwjzdC%7|7AD=WH(Oqox1gI_q^=~zPPjdXY`xn`_G-exOc(gtt&Pz z(XZODbnlWw?;N_g`egT+W$4_sv)6Zh_IXd=lApKr9%mi8y?^7b7iRtaXwQ;=xlvM8 zQROTOc4!KH>Xx9@V5zSpm3 zUGD4TFU$E91*NO6Epurh))cGUenW@XY&1Ko-6eGw#iTNK6#A5bD!P02fLiNlFDbFrch~~f4xPaoa=Y9nr(0<%@w@9iO=dTQ zI-AW=*$^smX*!x43;n9vYFkUKvcz3e=&rOn>mdvlw^+2z`UZ`r-PurHq^>Pfw^W%t zUbow?)3)kuMRl&)hK`boV6k3R9CDYH>)V;};XU>=C`FLk*_)zC(Wmu4v8Rv7eEA)R z=J{@cKuM7=DOzuRwBY5n8967r(_+8cCR8pxbxwSkdcE|An47119zOrYk^av&btV4( zz4N!9pg!*2mbYtLm)*#@*0u4CH^sX@-t(-gaPgVab?YTdpRzgXJFPB#v)uq;)oy61 zG(ltqW4iuVIlm|&Z5n66w2dn_bnYwMc{a#7+BAl@>67=jG(7WxZ~2^#%JZ>K&kWAi ztyizjnYkH3dT#O0E!>TK`-tJG>09R_zdc{Oq3qg8WX*B9c+(pSB5^TM4^7Ixj~KDlM{3iMAKR=@t)-2J`lSN**0YBzTIwR7jbJ>9)}=E}L3 z=07}tU+?Po-{_w+f9dkxALp01iC^sFxb zW`$*$YemO0WssA#LayUKTBQ(FE?y+~`0P*oj>Ny9;BO}Q4+WucC|Ohjlqe;DlFdjC zqsJ&N)W=YKC}WvDPNl z_YmVh3Fe2AED8m77Ku{N0f_?1fqOQyXTv>8a+q@%g+Y4^tPe`YF#1?V;#3T@$H8~7 zNCMu+BIBtkP@h1hLOD=B7MVz?z#ao~Ade&_ZW6O6nK;QX4n`(JJxZo9_EaX%R3`5< zW}n97oz9#a3-d-virHsSjW7=$b03eL{aMUA_{=*5%sU3^%p$X?GWd>c z#&04McOnz-Atufw<{gtLl#AjB_-Hxwi|A+>Sx4;AboYxhns3y2yfFoSr+KR0VfNS^ zW+%}XOAOEy&?gOILo86sCf=8F7*m-;kw4SX_=a&Z;d)S1!&pub z5tac825K~vVSsV~o;RFZ7^&#}97ZUH^m7=Qh|*6$B0xVQO&&KQc`!Ccntm^D_)pUT zWf*MwAYA}W2W079sxa8~C^P|deLV0Ofg;@1^pW_%VACUi1f!7y(DbGU#tV$qQMx`z z2L`)-4-qg@IGg-)25$@y-{8~_ae@;uldHa{g(_`nwS*KgP-e#NGLoWkPPsr&B@Caaco12^IHSnbcaIrY; z^a@02Z1$R=s=?c4X|~a>4K}M2xX@NhC}8vRVt~#Zu<_#Qux^__V0QcHV6EQv>=-Q~u#A-Df2u()rhuTX zV18c}qkwwE77f$~{fC0OVnk3^39ntnx9VS%NVw6AdJJQqKCxvfSA6#eaANWEII=^BI!tq8WH74DT<0ZxAK}O z0v<9VMAH<7E#97XQIx{n?e5V*TiImwo1pC}scx)=)>v0k7=YI0FKcOowy9cQS_rMr zR${4yR@YeX@MLSuJ%BTnm88@$kZTFs79Xj|%x?iNv0mR4U}>OpUO zwY3Hp>+)_f{pssZjEE+3Sqv`wJvtMZn$_OhZgwMv*61xk@XDg*t{!Ns%tmb!wAyZe z6Z-Dg8;a5A@`@r&0kjr#d$Cy>CG&MUq}d90xv|*}CKuPZ%4)z9eO;ggy6f8gdM9)@ z7uzl9Nx8kJ*8;7*#$Ss8FV^~O#W209RZ}gUr_gxRm+; zo-A*Bag`HoEiEX~w1L0FBKnGme)7;Czl{?3+cDxBwZ!(>9qF^*e|5h9_R>ke+qFC{ zqoQeYb3v{CwIw;~Q!AOc^iTKD&BJli%>_rwHyrtUxB9D-iw|sa&b;T$eay0_AFsW* zEjp>+)t|X7u0#Cb(>Ki@PoJ9K{CSsRAC*a${pn0*(zd5)FsG5W(rbW(4rExrviebsO~Mc!M2k7o$Wo+sH{d`i8X-Z8nspL0Kutr4E^f=zAz9W zz?sCbIAR#jqI2kTr{4X)$%65Jv9sglfU_8W&)&X$y9rvviJHF;}oE;Hs{M6)dUl zG~ppBvl?390B0@J6}hz->SguSJ`?A{uiOHZ;QGk*6!oHxqMG3M9sE|o59aR@(R)So zpHF_bQ7RCs8L4K2m-4}8>egpBmkbZ(PLlkIK%@-achv*Z-uvpS%9N_7&Z3D~Z^{ z^sSe7y)E1ksfDb%70q8fwMIly`5(r8B$6#3nwxUsF?c-^x$`!5Q%7c|^+5|LK(fF% zA7A)vH>~;iB#VqwU0BiD1u-+!bU4eP^|8U71KB~34mU*CUYcpCk(Dvx7nyaAo>$}a!!S=Gka^&EG3Uxg$k-ghk zsfJcxYxFk5p$j;CUTLSovdmgg4$Rcmt+pcr>52^n(t3HjqrfZuS4ClKi>p&~PnMyn z%3X%*T3YVY=~#bviRtgRzV%AvME**^-!gZ18xp3@SWxGO)~u~4#M57*ZE8jXI~xsF zl4R_fnhJPtQ){%yk)>LV6&>_Cic2xMba@QLkd)-6fU_+sTVACrE5+Q<6euJ+(Op^$ zVUbrF{Cern6!n3Ms(>g)uB&Wq0WV7{EERfqt?2G`Nb42t&8C`G5N>sqXdr=%cUgknIrz?Hp}S!OE1oIe5taGt`R!XGV&qTlPjj4H zytVHS|F$FU(#+iSbN?ZVEM4Wz@yc^Eq;t;nxK`F^^^^aL8d^V~SpU|Go_Ra8Nehqb zW7bYvQAO$Uq;zOPBR?P)7O4EBqZ3ZJTVw7 z-Wh>*w^?l3a&V~DRBXmrnoNdfh;dez*W=U^$6ST2HGrg`XVBNSZrucVgFoo9SsaoG zCS=pO^u~Qd!{9eR;htu0!f7Dai|~6FewX040e+m!v!@SV5z-$oGdPo?=4VsXt8os$I-5;7#}{$k6zoUv3tpAoVD(4~ug{e$0rG?+Hj zC;qxW=9ka?weQbg{i5Ic^TEFN==w$I?v~RFvyIVvL0NH+c z{hV-hmyOhLArzmDn_2a1)!CT9Z&y9O=J(sA2R=3QZ`xscEatOCQOdpFq?j&O?0C|0 zGrBTzwQtURtK3@M8=3jNzG}2ki>|l?#uGWI319>(;{S(BZ=)I2=1YB9@Voq7YhAo6-@n z%HRO0rC@+mc7g#3RxPoN1PdEMx0F;HI3KEzTET#bie*G1L5Bou2>`%yLfDP90(4{P zAne9k0J^a>5Om9e`97AB1yjCYSc176q%f@ux@Ey!4N}2$Ef|)PJPbatX*U!M38q(o z!)z;1^|$B1oI?FF3^ppv~Md;f7}^W%RHZ zjsX=rOLA=rI%2HhWgK1qix+#5@u3tGEncF5Y@arD!{`GfIv z0SCEm4&BT>UI{&ZqT+iG&JB5aY#!e|kPxzVFfjyu>R#TVgGrF+k0ajpkYCfOP+KA$B3IFW`t!Y>IDMoRiv37M^-!|!u?nu0JCz(m;^ ztX{9nVe!D0V&DBvPr&K%v+vO$Tl|g$xxs~|PNm~%VvF5jbAq~rXnG!PNM4SAaU7LRwl(;j zZbygT;%-ZbG4xNLg|UEmkqG*{-SjN-tZ6W+%YSq#vpxRtCDw@z)q|t)9F{6o=8H=jSYGb)Dn#$u_=_xs6ZZ&rol3bQhvUeRw`dxKxa^i zSK$si4kWvmgP=oVm`hMe{$->j9Sc#fFQysgd4hTzuBj2k(4^oAVD~&hdoDuxQbgkM zNqKC}W=H~8Qb{C(vu{G&jX8k(bTXYvC0>DYRvx=3QU-*}9~{7pRsx-D5y3cXA!dAr zEdYc_+YxCn8U4Ny4dLLBLZ^*D9&Y^c0&PpBAAkjd;$Oz;2vQht^o^-tG?r2fsjJZF(3=o&=Mpgb zTq?%(JbnlW8O^ew!07YS!02EN#_)w18Xev%)Nurj?oUIbM--v`8KhRC0=DN?5e$>< zNs2j!t?h(BrxECbG~AwRV?qnYu`d94s># zzV^fbhxZEAyo74jr(=Lm(m}c^g?$X^1*jSl%h)^!ly#CO5hn#K!`8Dx5v3h1m zDUE9zdK01#lh9}2+U}0yH(e4N6j?9}lITAp0(OL1#9C5;Gi*UF#Jh!fr!z1TgYi5y za(GdE0wsVCC<|cH1O)laQrLJKGm#+I&EYq42#m|V1kA2%hDqQxmi&+q;4gX|dJ|f? z3xRiKqSsf#fTKfcoJYQb$argD$Q1&SLvKRlK}0@7kUvq4Hkqvlc+gK&0E%}GMuHa* z1P#3jK~E#-`Ybf};9UNEgcgY{EC?=?&yc?k&LzajURfCa9s?SHI7jyiHCeTE!hiywl^GXFpV zI1s!&G5nPZ9SH9gYI-pb9N4}H9T-d;y`RAWQdbd0>~#u)Z)CWIS6xS>V3 zolo66dO0K6!i=C{mOgbagafZ{47tl;I6`ki)MFTqt=VKflXyZ6?Q5>GBfvu<y(fA|TNUPiDkKQ(u1tlU;1Um;(?)Le(bGVBm!`~(`@Zr5eW#6H) zmli|dl`kYKqZ4Jy-z=B`SIuPRCv6sAd#A&nYU%QMQ#)+U4!4EbKYgXjA82!^a#cQZ zTT&&fJQg>;kGe*|K%>*)vUjL*A6yxnLoQV?!a_W#@^rdfs$?D#Zd@=(?ocSk4XfA- zlIo$=4<)PYjz&wT%dg5s(bXWzY3D9l<71OM7YsZ74z~~-E=qSo=Jby?o40EstGIhy zm|d#iz=S?=1!cH2v3erC!TaG|h$42FuXj|)R)OJyF0ZFaHKCK_dKxasc26kD7IERC zT>pf!rv%q<|L~+z%-P|nb;=&WgY`PWVXC zp_*6?6hz}I7|k-I>VxUw+(IhB@qvB>w{X?k_}F*NEx|%94Ei777QQG87ybvhh5rT; zE`5ii9xAi$LKHrv+=UcdD1{5&iCehBCtUE3+!Fk-6E1pZZsDqxaM3$*3tO0k3H~^4 z;lDVIk2{K6gcAQ8ZV?nQ<6CvN+%iU;@X%_CncnS#-0w?ud+iQaD$*z;!P16ebX=vA z;`YUNIef^I@#(4Q8L64^N!CuM%O2mDW4Af%={Xs;EL(b`6{t7cX0v9d=PX*3x!AhM fvM}9|?XYKC?}M*&@B#Fn^pAQSzF?Qb|Ni=abvwAB literal 0 HcmV?d00001 diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/fake_to_oss_file_parquet.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/fake_to_oss_file_parquet.conf new file mode 100644 index 00000000000..4cf507dcd22 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/fake_to_oss_file_parquet.conf @@ -0,0 +1,84 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + FakeSource { + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + result_table_name = "fake" + } +} + +sink { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/tmp/seatunnel/parquet" + row_delimiter = "\n" + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + file_name_expression = "${transactionId}_${now}" + file_format_type = "parquet" + filename_time_format = "yyyy.MM.dd" + is_enable_transaction = true + compress_codec = "gzip" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_projection_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_projection_to_assert.conf new file mode 100644 index 00000000000..1c98be15bee --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_projection_to_assert.conf @@ -0,0 +1,81 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/parquet" + file_format_type = "parquet" + read_columns = [c_string, c_boolean, c_double] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_to_assert.conf new file mode 100644 index 00000000000..fbfcb6a8e83 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_to_assert.conf @@ -0,0 +1,98 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/parquet" + file_format_type = "parquet" + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_to_assert_with_multipletable.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_to_assert_with_multipletable.conf new file mode 100644 index 00000000000..e34806e1218 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_parquet_to_assert_with_multipletable.conf @@ -0,0 +1,64 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + tables_configs = [ + { + schema = { + table = "fake01" + } + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/parquet" + file_format_type = "parquet" + }, + { + schema = { + table = "fake02" + } + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + path = "/test/seatunnel/read/parquet" + file_format_type = "parquet" + } + ] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + table-names = ["fake01", "fake02"] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_to_console.conf new file mode 100644 index 00000000000..fb9e84c30d2 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/parquet/oss_file_to_console.conf @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + OssFile { + path = "/tmp/fake_empty" + bucket = "oss://whale-ops" + access_key = "xxxxxxxxxxxxxxxxxxx" + access_secret = "xxxxxxxxxxxxxxxxxxx" + endpoint = "https://oss-accelerate.aliyuncs.com" + file_format_type = "parquet" + } +} + +sink { + Console {} +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/e2e.txt b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/e2e.txt new file mode 100644 index 00000000000..9871cd85eb6 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/e2e.txt @@ -0,0 +1,5 @@ +uDDrwsQQYONTNeUBIOnLAgunvDqLBObroRzdEdvDgRmgaeFyFH5456857591576298739157764687713794636442057612252MTDnafalse3313846190943192276641872220071936002.4798444E389.52375328387482E307vcIGF2023-06-0776258155390368615610.7646252373186602912023-05-08 16:08:51ipToEdierOAbwQfQzObWqiRhjkWYaMKdCbjurhstsWrAVlRyyR2905930362869031292782506910815576701385108050hArFutrue12631169122166306155952414159791708165.949173E372.1775762383875058E307kMlgO2023-05-2027214280267865241887.6424416000104182532023-10-20 03:49:02 +QIpzzZNFkLwARZDSdwdBzkegCdIRVYJnuXgxNXytAJxxaTzmDF16603816781145850255103997497062535321459349811xaTOktrue5327578191749099325840234439082792961.955231E381.5072154481920294E308GDWOu2023-05-0581449039533149712064.4515003874168475032023-07-06 22:34:11sfgxhqvOLzjdTSNcNaWfEnZqvQraSSuMPazCGhPmSrGuxggqGh111449466287130860562118177510004750271267350957FDhTstrue96247293946402921952995131535667203.3240283E384.473485404447698E307YFdwf2023-02-0429456519357128996647.9939318900994572132023-01-12 02:29:58 +xVJPgVlosBlTYSkmJCqKHMXzbZkNQKInuVMZeYGhsmzUmcLyPx137745493211075991209783701051546835517166168384qcYaifalse8318050110096656524405690917018449922.9617934E371.8901064340036343E307jaKMq2023-05-1275317114043170470995.9654034735914367862023-05-18 08:09:22raGGBnHsNwMZKemkFErUbedNjSllNcKOVUGdTpXcHGSVphHsNE86377304018502081846122308810391870441519757437JCRZStrue1829974183977114228752256792969205767.9090967E371.6286963710372255E308NBHUB2023-05-0732934086493941743464.6503746053883129532023-05-06 04:35:55 +dBgFeTKkCfnxCljyGfNEurEzCVgwpsHgmcOfYXiQHxeeQNjQuq1961913761867016982512369059615238191571813320BTfhbfalse652666522281866957533025299230722.1456136E381.2398422714159417E308YOiwg2023-10-2433001899362876139955.7235198795513055732023-06-23 13:46:46jsvmHLHlXCGFKwuqlTwAjdMckElrmqgBWvOuuKuWxcinFZWSky19959088245502706421265289671411088181469730839vUyULtrue952655754382886132164227350822215681.9033253E381.0966562906060974E308XFeKf2023-09-1731084757529957096723.2394423349193989032023-06-15 17:04:50 +obtYzIHOTKsABVtirEKEMYUYobsYlDJcFbpQUYvGxCcKlnswEG8096984004544201585383739017658796661353001394xchcntrue853141253976762312923177914159380482.8480754E381.055208146200822E308MSkTD2023-11-2420361788179232141281.9718823433892185262023-10-25 11:47:50gdCWZMGESyarjQPopBhDwKnOyDvaUDgQOEDRCmfUAagfnDDPqV8473436731118772451890654127233667151574025969ewJzLtrue6321769209768782446484076920790579202.7134378E381.1883616449174808E308STvOu2023-10-0821793351767634029460.2897683013563753232023-08-12 23:57:38 \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/e2e.txt.lzo b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-oss-e2e/src/test/resources/text/e2e.txt.lzo new file mode 100644 index 0000000000000000000000000000000000000000..27db8e3b59241e2c35b28353f7432f94fdb8b56c GIT binary patch literal 2720 zcmXYzxo>M(9mnrI=Q0e|j5Ktk_lk)GZLG5|78-;UF&SUa$2~eM915jSD{Y*%+7qs=5#Cw(^3ob*oVA2Y>y?p4 zDIyp?!G+KwtTf|AFC6t|>tQHm$fZ*za>6tXnaBVnCCo97mq?Ozm!-VG_ zKM6kSQ|3;%$uO7EJiLEuKIWsr{cu&IXLq~$D5@W8cQMKw8o!IGcq|_6*lDU zYBMBU7$P}05K#zjjAVurt-ThCE3Uou+~OXF=(JaOE%VY6k>=JW^2I=)#i1_a3|EAfzyBooFK4HMoiLeEKEp(+e*Sbk z(r&ddy(GJ{>uyXNgWVuYj?>{dM+fKE?GCwEY;MPNU%frNC1)4=eUCPe^SBUV2TWqQ z))CH?cA5#T31`xKt30s`R!G=L@TQl_JI=#>uUVUhi5j7np}@u*vcd6r<~=TGr(m&C zaMcOxg-3MKUc*wJM1!X_!(O#g4q1Y~2n=&!F}|s7eh3H5)}iAHca&aB$(6TUFr(5+ zbIq_~3sszgloH`sS>)+Nh>TPjtZzLU?FJ z-Bhl6$0%2vT+L}?Rov{O@pxIBK!oI}${0bN%V!Opg|8^h`qr=RMkI)6m+ZBA!8YrJ?9Q;$Y4ADHkV9WQ5OXm)f^4 zuEw-Idt7J9tl4Qy>HH$MER#~@_IO9T(`vm;&KH|@g?5J>Y=1pJw&t{ddAQo6^wuh^ zd}2OlAc%(r#9JpV(uj~~5JYOt0TE+h2OtqH`<>qX-=2dv4%A4x6XJgv7L4_W$THKY zTv8DZQg|;w4R5$bw9peo1KZ*7St^)pq=H1i#6z!KS%$y+CV0&>wikjc+`=dZ8d>-O z0nj!B4JE|Xo-3J@=({+<26rc52J;zUYt<_jv(ciouI^~1TOQ7%X(7LA(YxWG`Vci{ zvudA~YwcDs8Z@77`?OSSw4b60KA$JfKo7|jz>u^A9d}eExI{=Cf+0PC2oouXQRsw1 z?=N!K-Np~xaBvZwBHT%Dlhq3%zyjdbymSOr03ir`W%xEW;Ra8?FbYq`{Xtu;ld6zv zw$#f0B_Di=o>(FU)I(6=lvik>MD)@I&9DmLmCk{1{y)#b65^pIOl73bXq_5l$Auwn zmZ!N9S?qJO2YM)uu+hybf6USLc=xm>rSZI9qodC4eVvr{!(p9PAL^SW>Tb|@iE#m4 zU?y@ah~`4VU5px#9H`?jg9go!Qm`=F98K43uo~f zRR-1*daf{fggoJ(r@>G7u-?u~(Pe3No1?`-dAEs{v*zv$OM=3c@dyvBC)?Jcb@}l* z_?ag7An2i$bi!f!!ATC_iv%kU;$R$t%E$xs(=V{#>G!iB8DIwA2-8a%ZVSWm=wqDn zr)Z+Y0|6-etKSAcMlZk{nTCE!2?=_GARZ0-_jB-MPPsneHiI~5mU_H<+d0znQmt7g z>$7b8Exljm%Xw1iv^tOUZrz#1_W=gBT;Ag%XTw z;t)H`Z_fy~M#BL3jgy8>Ou}NqeSgwleEatx<#Y(RqtZA6VCZ5%h6pPLg|+_Oo6n?U z${`F+IRHrgA^r?5Ce_0Ufh8LKCMK1}v>77^uQ*SFAixc7Av#J0J#lXb2`#26Mv|1y z3)EM9_byl_!GQ!kqbwu{$AjEWH>wo#jRWni9_m-o<5PAL(_MK{J4Ere*NW-5UdzYP zb#6Xt(X-xow20#PYS|``95=^2pqL*xRSAVr0il5;j!^;aL4J;Fzye|py~uEPalHKF zyWlG$F(d$f9KRre!$Cw5G#G;7qF@$?yzUB?_`Xf(IA z&4(B=Fr1Wconnector-file-local-e2e connector-file-cos-e2e connector-file-sftp-e2e + connector-file-oss-e2e connector-cassandra-e2e connector-neo4j-e2e connector-http-e2e diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java index 96a3901f22f..1c043866e5b 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java @@ -41,6 +41,7 @@ import org.apache.seatunnel.common.config.TypesafeConfigUtils; import org.apache.seatunnel.common.constants.CollectionConstants; import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; +import org.apache.seatunnel.common.utils.ExceptionUtils; import org.apache.seatunnel.core.starter.execution.PluginUtil; import org.apache.seatunnel.core.starter.utils.ConfigBuilder; import org.apache.seatunnel.engine.common.config.JobConfig; @@ -275,8 +276,11 @@ private static boolean isFallback( if (e instanceof UnsupportedOperationException && "The Factory has not been implemented and the deprecated Plugin will be used." .equals(e.getMessage())) { + log.warn( + "The Factory has not been implemented and the deprecated Plugin will be used."); return true; } + log.warn(ExceptionUtils.getMessage(e)); } return false; } diff --git a/seatunnel-examples/seatunnel-engine-examples/pom.xml b/seatunnel-examples/seatunnel-engine-examples/pom.xml index 33dcaed895f..b15b7d8b4a5 100644 --- a/seatunnel-examples/seatunnel-engine-examples/pom.xml +++ b/seatunnel-examples/seatunnel-engine-examples/pom.xml @@ -38,12 +38,6 @@ ${project.version} - - org.apache.hadoop - hadoop-client - ${hadoop3.version} - - org.apache.seatunnel