Skip to content

Commit e893dee

Browse files
authored
[Feature][Connector-V2] Support single file mode in file sink (#8518)
1 parent edca75b commit e893dee

File tree

38 files changed

+754
-275
lines changed

38 files changed

+754
-275
lines changed

docs/en/connector-v2/sink/CosFile.md

+32-31
Original file line numberDiff line numberDiff line change
@@ -34,37 +34,38 @@ By default, we use 2PC commit to ensure `exactly-once`
3434

3535
## Options
3636

37-
| Name | Type | Required | Default | Description |
38-
|---------------------------------------|---------|----------|--------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
39-
| path | string | yes | - | |
40-
| tmp_path | string | no | /tmp/seatunnel | The result file will write to a tmp path first and then use `mv` to submit tmp dir to target dir. Need a COS dir. |
41-
| bucket | string | yes | - | |
42-
| secret_id | string | yes | - | |
43-
| secret_key | string | yes | - | |
44-
| region | string | yes | - | |
45-
| custom_filename | boolean | no | false | Whether you need custom the filename |
46-
| file_name_expression | string | no | "${transactionId}" | Only used when custom_filename is true |
47-
| filename_time_format | string | no | "yyyy.MM.dd" | Only used when custom_filename is true |
48-
| file_format_type | string | no | "csv" | |
49-
| field_delimiter | string | no | '\001' | Only used when file_format is text |
50-
| row_delimiter | string | no | "\n" | Only used when file_format is text |
51-
| have_partition | boolean | no | false | Whether you need processing partitions. |
52-
| partition_by | array | no | - | Only used then have_partition is true |
53-
| partition_dir_expression | string | no | "${k0}=${v0}/${k1}=${v1}/.../${kn}=${vn}/" | Only used then have_partition is true |
54-
| is_partition_field_write_in_file | boolean | no | false | Only used then have_partition is true |
55-
| sink_columns | array | no | | When this parameter is empty, all fields are sink columns |
56-
| is_enable_transaction | boolean | no | true | |
57-
| batch_size | int | no | 1000000 | |
58-
| compress_codec | string | no | none | |
59-
| common-options | object | no | - | |
60-
| max_rows_in_memory | int | no | - | Only used when file_format is excel. |
61-
| sheet_name | string | no | Sheet${Random number} | Only used when file_format is excel. |
62-
| xml_root_tag | string | no | RECORDS | Only used when file_format is xml. |
63-
| xml_row_tag | string | no | RECORD | Only used when file_format is xml. |
64-
| xml_use_attr_format | boolean | no | - | Only used when file_format is xml. |
65-
| parquet_avro_write_timestamp_as_int96 | boolean | no | false | Only used when file_format is parquet. |
66-
| parquet_avro_write_fixed_as_int96 | array | no | - | Only used when file_format is parquet. |
67-
| encoding | string | no | "UTF-8" | Only used when file_format_type is json,text,csv,xml. |
37+
| Name | Type | Required | Default | Description |
38+
|---------------------------------------|---------|----------|--------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
39+
| path | string | yes | - | |
40+
| tmp_path | string | no | /tmp/seatunnel | The result file will write to a tmp path first and then use `mv` to submit tmp dir to target dir. Need a COS dir. |
41+
| bucket | string | yes | - | |
42+
| secret_id | string | yes | - | |
43+
| secret_key | string | yes | - | |
44+
| region | string | yes | - | |
45+
| custom_filename | boolean | no | false | Whether you need custom the filename |
46+
| file_name_expression | string | no | "${transactionId}" | Only used when custom_filename is true |
47+
| filename_time_format | string | no | "yyyy.MM.dd" | Only used when custom_filename is true |
48+
| file_format_type | string | no | "csv" | |
49+
| field_delimiter | string | no | '\001' | Only used when file_format is text |
50+
| row_delimiter | string | no | "\n" | Only used when file_format is text |
51+
| have_partition | boolean | no | false | Whether you need processing partitions. |
52+
| partition_by | array | no | - | Only used then have_partition is true |
53+
| partition_dir_expression | string | no | "${k0}=${v0}/${k1}=${v1}/.../${kn}=${vn}/" | Only used then have_partition is true |
54+
| is_partition_field_write_in_file | boolean | no | false | Only used then have_partition is true |
55+
| sink_columns | array | no | | When this parameter is empty, all fields are sink columns |
56+
| is_enable_transaction | boolean | no | true | |
57+
| batch_size | int | no | 1000000 | |
58+
| compress_codec | string | no | none | |
59+
| common-options | object | no | - | |
60+
| max_rows_in_memory | int | no | - | Only used when file_format is excel. |
61+
| sheet_name | string | no | Sheet${Random number} | Only used when file_format is excel. |
62+
| xml_root_tag | string | no | RECORDS | Only used when file_format is xml. |
63+
| xml_row_tag | string | no | RECORD | Only used when file_format is xml. |
64+
| xml_use_attr_format | boolean | no | - | Only used when file_format is xml. |
65+
| single_file_mode | boolean | no | false | Each parallelism will only output one file. When this parameter is turned on, batch_size will not take effect. The output file name does not have a file block suffix. |
66+
| parquet_avro_write_timestamp_as_int96 | boolean | no | false | Only used when file_format is parquet. |
67+
| parquet_avro_write_fixed_as_int96 | array | no | - | Only used when file_format is parquet. |
68+
| encoding | string | no | "UTF-8" | Only used when file_format_type is json,text,csv,xml. |
6869

6970
### path [string]
7071

0 commit comments

Comments
 (0)