Skip to content

Commit de1bd9d

Browse files
JackWang032jialan
and
jialan
authored
feat: add benchmark test suite (#273)
* feat: support languages benchmark * feat: add benchmark sql test case * feat: remove flinksql benchmark * fix: fix validate throw exception in benchmark * feat: support custom params when run benchmark test method --------- Co-authored-by: jialan <[email protected]>
1 parent 63df067 commit de1bd9d

27 files changed

+2352
-19791
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ src/**/.antlr
88
coverage
99
.idea
1010
gen/
11-
src/**/*.iml
11+
src/**/*.iml
12+
benchmark/reports/*

benchmark/data/flink/create.sql

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
CREATE TABLE MyTable ('user_id' BIGINT, 'name' STRING) WITH ('connector' = 'oracle-x');
2+
3+
CREATE TABLE MyTable WITH ('connector' = 'oracle-x');
4+
5+
CREATE TEMPORARY TABLE client_errors (
6+
log_time TIMESTAMP(3),
7+
request_line STRING,
8+
status_code STRING,
9+
size INT
10+
) WITH (
11+
'connector' = 'stream-x'
12+
);
13+
14+
-- 尽管官方文档的 BNF 里没有支持创建临时表,但实际上是支持的
15+
CREATE TEMPORARY TABLE MyTable ('user_id' BIGINT, 'name' STRING) WITH ('connector' = 'oracle-x');
16+
17+
CREATE TABLE MyTable (
18+
'user_id' BIGINT,
19+
'name' STRING,
20+
'timestamp' BIGINT METADATA, -- part of the query-to-sink schema
21+
'offset' BIGINT METADATA VIRTUAL, -- not part of the query-to-sink schema
22+
'record_time' TIMESTAMP(3) WITH LOCAL TIME ZONE METADATA FROM 'timestamp' -- reads and writes a Kafka record's timestamp
23+
) WITH ('connector' = 'kafka');
24+
25+
CREATE TABLE MyTable (
26+
'user_id' BIGINT,
27+
'price' DOUBLE,
28+
'quantity' DOUBLE,
29+
'cost' AS price * quanitity -- evaluate expression and supply the result to queries
30+
) WITH ('connector' = 'kafka');
31+
32+
CREATE TABLE MyTable (
33+
'user' BIGINT,
34+
product STRING,
35+
order_time TIMESTAMP(3),
36+
WATERMARK FOR order_time AS order_time - INTERVAL '5' SECOND
37+
) WITH ('connector' = 'kafka');
38+
39+
CREATE TABLE MyTable (id INT, PRIMARY KEY (id) NOT ENFORCED) WITH ('connector' = 'kafka');
40+
41+
CREATE TABLE tbl1 (
42+
a BIGINT,
43+
h VARCHAR,
44+
g AS 2 * (a + 1),
45+
ts AS toTimestamp(b, 'yyyy-MM-dd HH:mm:ss'),
46+
b VARCHAR,
47+
proc AS PROCTIME(),
48+
meta STRING METADATA,
49+
my_meta STRING METADATA FROM 'meta',
50+
my_meta STRING METADATA FROM 'meta' VIRTUAL,
51+
meta STRING METADATA VIRTUAL,
52+
PRIMARY KEY (a, b) NOT ENFORCED
53+
) PARTITIONED BY (a, h) WITH (
54+
'connector' = 'kafka',
55+
'kafka.topic' = 'log.test'
56+
);
57+
58+
CREATE TABLE Orders_in_file (
59+
'user' BIGINT,
60+
product STRING,
61+
order_time_string STRING,
62+
order_time AS to_timestamp(order_time)
63+
) PARTITIONED BY ('user') WITH (
64+
'connector' = 'filesystem',
65+
'path' = '...'
66+
);
67+
68+
CREATE TABLE Orders_with_watermark (
69+
id INT,
70+
-- Add watermark definition
71+
WATERMARK FOR order_time AS order_time - INTERVAL '5' SECOND
72+
) WITH (
73+
-- Overwrite the startup-mode
74+
'scan.startup.mode' = 'latest-offset'
75+
) LIKE Orders_in_file (
76+
-- Exclude everything besides the computed columns which we need to generate the watermark for.
77+
-- We do not want to have the partitions or filesystem options as those do not apply to kafka.
78+
EXCLUDING ALL
79+
INCLUDING GENERATED
80+
);
81+
82+
CREATE TABLE my_ctas_table WITH ('connector' = 'kafka')
83+
AS SELECT
84+
id,
85+
name,
86+
age
87+
FROM
88+
source_table
89+
WHERE
90+
mod(id, 10) = 0;
91+
92+
CREATE TABLE catalog1.db1.table1 (id INT) WITH ('connector' = 'kafka');
93+
94+
CREATE TABLE catalog1.db1.table1 (
95+
attr0 STRING,
96+
attr1 BOOLEAN,
97+
attr3 DECIMAL(38, 18),
98+
attr4 TINYINT,
99+
attr5 SMALLINT,
100+
attr6 INT,
101+
attr7 BIGINT,
102+
attr8 FLOAT,
103+
attr9 DOUBLE,
104+
attr10 DATE,
105+
attr11 TIME,
106+
attr12 TIMESTAMP(3),
107+
attr13 ARRAY<STRING>,
108+
attr14 ROW<attr15 FLOAT, attr16 TIMESTAMP(3)>,
109+
attr17 MAP<INT, BIGINT>,
110+
name1 VARCHAR(64),
111+
message ROW<data ROW<UPO_TIMESTAMP VARCHAR(20)>>,
112+
`raw` RAW('class', 'snapshot')
113+
) WITH ('connector' = 'kafka');

benchmark/data/flink/select.sql

+122
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
-- Window TVF Aggregation
2+
SELECT
3+
window_start,
4+
window_end,
5+
supplier_id,
6+
SUM(price) as price
7+
FROM TABLE(
8+
TUMBLE(TABLE Bid, DESCRIPTOR(bidtime), INTERVAL '10' MINUTES))
9+
GROUP BY window_start, window_end, GROUPING SETS ((supplier_id), ());
10+
11+
SELECT
12+
window_start,
13+
window_end,
14+
supplier_id,
15+
SUM(price) as price
16+
FROM TABLE(
17+
TUMBLE(TABLE Bid, DESCRIPTOR(bidtime), INTERVAL '10' MINUTES))
18+
GROUP BY window_start, window_end, ROLLUP (supplier_id);
19+
20+
SELECT
21+
window_start,
22+
window_end,
23+
item, supplier_id,
24+
SUM(price) as price
25+
FROM TABLE(
26+
TUMBLE(TABLE Bid, DESCRIPTOR(bidtime), INTERVAL '10' MINUTES))
27+
GROUP BY window_start, window_end, CUBE (supplier_id, item);
28+
29+
-- GROUPING SETS
30+
SELECT
31+
window_start,
32+
window_end,
33+
supplier_id,
34+
SUM(price) as price
35+
FROM TABLE(
36+
TUMBLE(TABLE Bid, DESCRIPTOR(bidtime), INTERVAL '10' MINUTES))
37+
GROUP BY
38+
window_start,
39+
window_end,
40+
GROUPING SETS ((supplier_id), ());
41+
42+
SELECT
43+
window_start,
44+
window_end,
45+
supplier_id,
46+
SUM(price) as price
47+
FROM TABLE(
48+
TUMBLE(TABLE Bid, DESCRIPTOR(bidtime), INTERVAL '10' MINUTES))
49+
GROUP BY
50+
window_start,
51+
window_end,
52+
ROLLUP (supplier_id);
53+
54+
SELECT
55+
window_start,
56+
window_end,
57+
item,
58+
supplier_id,
59+
SUM(price) as price
60+
FROM TABLE(
61+
TUMBLE(TABLE Bid, DESCRIPTOR(bidtime), INTERVAL '10' MINUTES))
62+
GROUP BY
63+
window_start,
64+
window_end,
65+
CUBE (supplier_id, item);
66+
67+
-- Group Window Aggregation
68+
SELECT
69+
`user`,
70+
TUMBLE_START(order_time, INTERVAL '1' DAY) AS wStart,
71+
SUM(amount) FROM Orders
72+
GROUP BY
73+
TUMBLE(order_time, INTERVAL '1' DAY),
74+
`user`;
75+
76+
SELECT
77+
`user`,
78+
TUMBLE_START(order_time, INTERVAL '1' DAY) AS wStart,
79+
SUM(amount) FROM Orders
80+
GROUP BY
81+
HOP(order_time, INTERVAL '1' DAY),
82+
`user`;
83+
84+
SELECT
85+
`user`,
86+
TUMBLE_START(order_time, INTERVAL '1' DAY) AS wStart,
87+
SUM(amount) FROM Orders
88+
GROUP BY
89+
SESSION(order_time, INTERVAL '1' DAY),
90+
`user`;
91+
92+
-- Having
93+
SELECT SUM(amount)
94+
FROM Orders
95+
GROUP BY `users`
96+
HAVING SUM(amount) > 50;
97+
98+
-- Over Aggregation
99+
SELECT order_id, order_time, amount,
100+
SUM(amount) OVER (
101+
PARTITION BY product
102+
ORDER BY order_time
103+
RANGE BETWEEN INTERVAL '1' HOUR PRECEDING AND CURRENT ROW
104+
) AS one_hour_prod_amount_sum
105+
FROM Orders;
106+
107+
SELECT product, order_time, amount,
108+
SUM(amount) OVER (
109+
PARTITION BY product
110+
ORDER BY order_time
111+
ROWS BETWEEN 5 PRECEDING AND CURRENT ROW
112+
) AS one_hour_prod_amount_sum
113+
FROM source_table;
114+
115+
SELECT order_id, order_time, amount,
116+
SUM(amount) OVER w AS sum_amount,
117+
AVG(amount) OVER w AS avg_amount
118+
FROM Orders
119+
WINDOW w AS (
120+
PARTITION BY product
121+
ORDER BY order_time
122+
RANGE BETWEEN INTERVAL '1' HOUR PRECEDING AND CURRENT ROW);

benchmark/data/hive/create.sql

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
CREATE TEMPORARY TABLE list_bucket_multiple (col1 STRING, col2 INT, col3 STRING);
2+
3+
CREATE TEMPORARY EXTERNAL TABLE list_bucket_multiple (col1 STRING, col2 INT, col3 STRING);
4+
5+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS list_bucket_multiple (col1 STRING, col2 INT, col3 STRING);
6+
7+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) COMMENT 'this is a comment';
8+
9+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) COMMENT 'this is a comment1' PARTITIONED BY (`date` STRING COMMENT 'column_comment');
10+
11+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) COMMENT 'this is a comment2' PARTITIONED BY (`date` STRING COMMENT 'column_comment') CLUSTERED BY (col1, col2) INTO 32 BUCKETS;
12+
13+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) COMMENT 'this is a comment3' PARTITIONED BY (`date` STRING COMMENT 'column_comment') CLUSTERED BY (col1, col2) SORTED BY (col1 ASC) INTO 22 BUCKETS;
14+
15+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) COMMENT 'this is a comment4' PARTITIONED BY (`date` STRING COMMENT 'column_comment') CLUSTERED BY (col1, col2) SORTED BY (col1 ASC) INTO 34 BUCKETS SKEWED BY (col1, col2) ON (('s1', 1), ('s3', 3), ('s13', 13), ('s78', 78)) STORED AS DIRECTORIES;
16+
17+
CREATE TABLE page_view(
18+
viewTime INT,
19+
userid BIGINT,
20+
page_url STRING,
21+
referrer_url STRING,
22+
ip STRING COMMENT 'IP Address of the User'
23+
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE;
24+
25+
CREATE TEMPORARY EXTERNAL TABLE page_view(
26+
viewTime INT,
27+
userid BIGINT,
28+
page_url STRING,
29+
referrer_url STRING,
30+
ip STRING COMMENT 'IP Address of the User'
31+
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS TEXTFILE;
32+
33+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
34+
viewTime INT,
35+
userid BIGINT,
36+
page_url STRING,
37+
referrer_url STRING,
38+
ip STRING COMMENT 'IP Address of the User'
39+
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS RCFILE;
40+
41+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
42+
viewTime INT,
43+
userid BIGINT,
44+
page_url STRING,
45+
referrer_url STRING,
46+
ip STRING COMMENT 'IP Address of the User'
47+
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS ORC LOCATION '/hsd_path';
48+
49+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
50+
viewTime INT,
51+
userid BIGINT,
52+
page_url STRING,
53+
referrer_url STRING,
54+
ip STRING COMMENT 'IP Address of the User'
55+
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS PARQUET LOCATION '/hsd_path' AS
56+
SELECT
57+
(key % 1024) new_key,
58+
concat(key, value) key_value_pair
59+
FROM
60+
key_value_store SORT BY new_key,
61+
key_value_pair;
62+
63+
64+
CREATE TABLE list_bucket_single (key STRING, value STRING)
65+
SKEWED BY (key) ON (1,5,6) STORED AS AVRO;
66+
67+
CREATE TRANSACTIONAL TABLE transactional_table_test(key STRING, value STRING) PARTITIONED BY(ds STRING) STORED AS INPUTFORMAT 'inputfilename' OUTPUTFORMAT 'outputfilename';
68+
69+
CREATE TABLE IF NOT EXISTS copy_table LIKE origin_table;
70+
71+
CREATE TEMPORARY TABLE IF NOT EXISTS copy_table LIKE origin_table;
72+
73+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS copy_table LIKE origin_table;
74+
75+
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS copy_table LIKE origin_table LOCATION '/hdfs_path';
76+
77+
CREATE TABLE IF NOT EXISTS derived_table AS
78+
SELECT
79+
*
80+
FROM
81+
origin_table;
82+
83+
CREATE TABLE `mydb.t1`(
84+
`id` INT,
85+
`dept_no` INT,
86+
`addr` STRING,
87+
`tel` STRING,
88+
`hobby` ARRAY < STRING >,
89+
`add` MAP < STRING,
90+
STRING >
91+
) PARTITIONED BY(`date` STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY '-' MAP KEYS TERMINATED BY ':';
92+
93+
CREATE EXTERNAL TABLE mydb.ext_table(
94+
id INT,
95+
name STRING,
96+
hobby ARRAY < STRING >,
97+
add
98+
MAP < STRING,
99+
STRING >
100+
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY '-' MAP KEYS TERMINATED BY ':' LOCATION '/user/mydb/ext_table' TBLPROPERTIES('author' = 'hayden', 'desc' = '一个外部测试表');

benchmark/data/hive/params.json

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"create": {
3+
"validate": ["$sql"],
4+
"getAllTokens": ["$sql"],
5+
"getAllEntities": ["$sql", { "lineNumber": 8, "column": 1 }]
6+
},
7+
"select": {
8+
"validate": ["$sql"],
9+
"getAllTokens": ["$sql"],
10+
"getAllEntities": ["$sql", { "lineNumber": 8, "column": 1 }]
11+
}
12+
}

0 commit comments

Comments
 (0)