Skip to content

Commit 1a8e9b4

Browse files
authored
[Feature][OssFile Connector] Make Oss implement source factory and sink factory (#6062)
1 parent b32df93 commit 1a8e9b4

File tree

58 files changed

+3959
-231
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+3959
-231
lines changed

docs/en/connector-v2/sink/OssFile.md

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,123 @@ sink {
362362
}
363363
```
364364

365+
### Multiple Table
366+
367+
For extract source metadata from upstream, you can use `${database_name}`, `${table_name}` and `${schema_name}` in the path.
368+
369+
```bash
370+
371+
env {
372+
parallelism = 1
373+
spark.app.name = "SeaTunnel"
374+
spark.executor.instances = 2
375+
spark.executor.cores = 1
376+
spark.executor.memory = "1g"
377+
spark.master = local
378+
job.mode = "BATCH"
379+
}
380+
381+
source {
382+
FakeSource {
383+
tables_configs = [
384+
{
385+
schema = {
386+
table = "fake1"
387+
fields {
388+
c_map = "map<string, string>"
389+
c_array = "array<int>"
390+
c_string = string
391+
c_boolean = boolean
392+
c_tinyint = tinyint
393+
c_smallint = smallint
394+
c_int = int
395+
c_bigint = bigint
396+
c_float = float
397+
c_double = double
398+
c_bytes = bytes
399+
c_date = date
400+
c_decimal = "decimal(38, 18)"
401+
c_timestamp = timestamp
402+
c_row = {
403+
c_map = "map<string, string>"
404+
c_array = "array<int>"
405+
c_string = string
406+
c_boolean = boolean
407+
c_tinyint = tinyint
408+
c_smallint = smallint
409+
c_int = int
410+
c_bigint = bigint
411+
c_float = float
412+
c_double = double
413+
c_bytes = bytes
414+
c_date = date
415+
c_decimal = "decimal(38, 18)"
416+
c_timestamp = timestamp
417+
}
418+
}
419+
}
420+
},
421+
{
422+
schema = {
423+
table = "fake2"
424+
fields {
425+
c_map = "map<string, string>"
426+
c_array = "array<int>"
427+
c_string = string
428+
c_boolean = boolean
429+
c_tinyint = tinyint
430+
c_smallint = smallint
431+
c_int = int
432+
c_bigint = bigint
433+
c_float = float
434+
c_double = double
435+
c_bytes = bytes
436+
c_date = date
437+
c_decimal = "decimal(38, 18)"
438+
c_timestamp = timestamp
439+
c_row = {
440+
c_map = "map<string, string>"
441+
c_array = "array<int>"
442+
c_string = string
443+
c_boolean = boolean
444+
c_tinyint = tinyint
445+
c_smallint = smallint
446+
c_int = int
447+
c_bigint = bigint
448+
c_float = float
449+
c_double = double
450+
c_bytes = bytes
451+
c_date = date
452+
c_decimal = "decimal(38, 18)"
453+
c_timestamp = timestamp
454+
}
455+
}
456+
}
457+
}
458+
]
459+
}
460+
}
461+
462+
sink {
463+
OssFile {
464+
bucket = "oss://whale-ops"
465+
access_key = "xxxxxxxxxxxxxxxxxxx"
466+
access_secret = "xxxxxxxxxxxxxxxxxxx"
467+
endpoint = "https://oss-accelerate.aliyuncs.com"
468+
path = "/tmp/fake_empty/text/${table_name}"
469+
row_delimiter = "\n"
470+
partition_dir_expression = "${k0}=${v0}"
471+
is_partition_field_write_in_file = true
472+
file_name_expression = "${transactionId}_${now}"
473+
file_format_type = "text"
474+
filename_time_format = "yyyy.MM.dd"
475+
is_enable_transaction = true
476+
compress_codec = "lzo"
477+
}
478+
}
479+
480+
```
481+
365482
## Changelog
366483

367484
### 2.2.0-beta 2022-09-26

docs/en/connector-v2/source/OssFile.md

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,177 @@ sink {
293293
}
294294
```
295295

296+
### Multiple Table
297+
298+
No need to config schema file type, eg: `orc`.
299+
300+
```
301+
env {
302+
parallelism = 1
303+
spark.app.name = "SeaTunnel"
304+
spark.executor.instances = 2
305+
spark.executor.cores = 1
306+
spark.executor.memory = "1g"
307+
spark.master = local
308+
job.mode = "BATCH"
309+
}
310+
311+
source {
312+
OssFile {
313+
tables_configs = [
314+
{
315+
schema = {
316+
table = "fake01"
317+
}
318+
bucket = "oss://whale-ops"
319+
access_key = "xxxxxxxxxxxxxxxxxxx"
320+
access_secret = "xxxxxxxxxxxxxxxxxxx"
321+
endpoint = "https://oss-accelerate.aliyuncs.com"
322+
path = "/test/seatunnel/read/orc"
323+
file_format_type = "orc"
324+
},
325+
{
326+
schema = {
327+
table = "fake02"
328+
}
329+
bucket = "oss://whale-ops"
330+
access_key = "xxxxxxxxxxxxxxxxxxx"
331+
access_secret = "xxxxxxxxxxxxxxxxxxx"
332+
endpoint = "https://oss-accelerate.aliyuncs.com"
333+
path = "/test/seatunnel/read/orc"
334+
file_format_type = "orc"
335+
}
336+
]
337+
result_table_name = "fake"
338+
}
339+
}
340+
341+
sink {
342+
Assert {
343+
rules {
344+
table-names = ["fake01", "fake02"]
345+
}
346+
}
347+
}
348+
```
349+
350+
Need config schema file type, eg: `json`
351+
352+
```
353+
354+
env {
355+
execution.parallelism = 1
356+
spark.app.name = "SeaTunnel"
357+
spark.executor.instances = 2
358+
spark.executor.cores = 1
359+
spark.executor.memory = "1g"
360+
spark.master = local
361+
job.mode = "BATCH"
362+
}
363+
364+
source {
365+
OssFile {
366+
tables_configs = [
367+
{
368+
bucket = "oss://whale-ops"
369+
access_key = "xxxxxxxxxxxxxxxxxxx"
370+
access_secret = "xxxxxxxxxxxxxxxxxxx"
371+
endpoint = "https://oss-accelerate.aliyuncs.com"
372+
path = "/test/seatunnel/read/json"
373+
file_format_type = "json"
374+
schema = {
375+
table = "fake01"
376+
fields {
377+
c_map = "map<string, string>"
378+
c_array = "array<int>"
379+
c_string = string
380+
c_boolean = boolean
381+
c_tinyint = tinyint
382+
c_smallint = smallint
383+
c_int = int
384+
c_bigint = bigint
385+
c_float = float
386+
c_double = double
387+
c_bytes = bytes
388+
c_date = date
389+
c_decimal = "decimal(38, 18)"
390+
c_timestamp = timestamp
391+
c_row = {
392+
C_MAP = "map<string, string>"
393+
C_ARRAY = "array<int>"
394+
C_STRING = string
395+
C_BOOLEAN = boolean
396+
C_TINYINT = tinyint
397+
C_SMALLINT = smallint
398+
C_INT = int
399+
C_BIGINT = bigint
400+
C_FLOAT = float
401+
C_DOUBLE = double
402+
C_BYTES = bytes
403+
C_DATE = date
404+
C_DECIMAL = "decimal(38, 18)"
405+
C_TIMESTAMP = timestamp
406+
}
407+
}
408+
}
409+
},
410+
{
411+
bucket = "oss://whale-ops"
412+
access_key = "xxxxxxxxxxxxxxxxxxx"
413+
access_secret = "xxxxxxxxxxxxxxxxxxx"
414+
endpoint = "https://oss-accelerate.aliyuncs.com"
415+
path = "/test/seatunnel/read/json"
416+
file_format_type = "json"
417+
schema = {
418+
table = "fake02"
419+
fields {
420+
c_map = "map<string, string>"
421+
c_array = "array<int>"
422+
c_string = string
423+
c_boolean = boolean
424+
c_tinyint = tinyint
425+
c_smallint = smallint
426+
c_int = int
427+
c_bigint = bigint
428+
c_float = float
429+
c_double = double
430+
c_bytes = bytes
431+
c_date = date
432+
c_decimal = "decimal(38, 18)"
433+
c_timestamp = timestamp
434+
c_row = {
435+
C_MAP = "map<string, string>"
436+
C_ARRAY = "array<int>"
437+
C_STRING = string
438+
C_BOOLEAN = boolean
439+
C_TINYINT = tinyint
440+
C_SMALLINT = smallint
441+
C_INT = int
442+
C_BIGINT = bigint
443+
C_FLOAT = float
444+
C_DOUBLE = double
445+
C_BYTES = bytes
446+
C_DATE = date
447+
C_DECIMAL = "decimal(38, 18)"
448+
C_TIMESTAMP = timestamp
449+
}
450+
}
451+
}
452+
}
453+
]
454+
result_table_name = "fake"
455+
}
456+
}
457+
458+
sink {
459+
Assert {
460+
rules {
461+
table-names = ["fake01", "fake02"]
462+
}
463+
}
464+
}
465+
```
466+
296467
## Changelog
297468

298469
### 2.2.0-beta 2022-09-26

seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseFileSourceConfig.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,18 @@ public abstract class BaseFileSourceConfig implements Serializable {
5151
private final FileFormat fileFormat;
5252
private final ReadStrategy readStrategy;
5353
private final List<String> filePaths;
54+
private final ReadonlyConfig baseFileSourceConfig;
5455

5556
public abstract HadoopConf getHadoopConfig();
5657

5758
public abstract String getPluginName();
5859

5960
public BaseFileSourceConfig(ReadonlyConfig readonlyConfig) {
61+
this.baseFileSourceConfig = readonlyConfig;
6062
this.fileFormat = readonlyConfig.get(BaseSourceConfigOptions.FILE_FORMAT_TYPE);
6163
this.readStrategy = ReadStrategyFactory.of(readonlyConfig, getHadoopConfig());
6264
this.filePaths = parseFilePaths(readonlyConfig);
65+
6366
this.catalogTable = parseCatalogTable(readonlyConfig);
6467
}
6568

0 commit comments

Comments
 (0)