From 99ed87e49d152ac6b45bbc189a3e05400871c91c Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 5 Dec 2024 20:45:08 -0500 Subject: [PATCH 01/47] Add all tpch queries (from risinglightdb tests) --- optd-sqllogictest/slt/tpch-q1.slt | 22 ++--- optd-sqllogictest/slt/tpch-q10.slt | 56 ++++++++++++ optd-sqllogictest/slt/tpch-q11.slt.disabled | 31 +++++++ optd-sqllogictest/slt/tpch-q12.slt | 34 ++++++++ optd-sqllogictest/slt/tpch-q13.slt | 51 +++++++++++ optd-sqllogictest/slt/tpch-q14.slt | 18 ++++ optd-sqllogictest/slt/tpch-q15.slt | 40 +++++++++ optd-sqllogictest/slt/tpch-q16.slt.disabled | 68 +++++++++++++++ optd-sqllogictest/slt/tpch-q17.slt.disabled | 22 +++++ optd-sqllogictest/slt/tpch-q18.slt.disabled | 41 +++++++++ optd-sqllogictest/slt/tpch-q19.slt | 40 +++++++++ optd-sqllogictest/slt/tpch-q2.slt.disabled | 76 ++++++++-------- optd-sqllogictest/slt/tpch-q20.slt.disabled | 42 +++++++++ optd-sqllogictest/slt/tpch-q21.slt.disabled | 44 ++++++++++ optd-sqllogictest/slt/tpch-q22.slt.disabled | 48 +++++++++++ optd-sqllogictest/slt/tpch-q3.slt | 51 ++++++----- optd-sqllogictest/slt/tpch-q4.slt.disabled | 30 +++++++ optd-sqllogictest/slt/tpch-q5.slt | 37 ++++---- optd-sqllogictest/slt/tpch-q6.slt | 18 ++-- optd-sqllogictest/slt/tpch-q7.slt | 45 ++++++++++ optd-sqllogictest/slt/tpch-q8.slt | 43 +++++++++ optd-sqllogictest/slt/tpch-q9.slt | 96 +++++++++++++++++++++ 22 files changed, 850 insertions(+), 103 deletions(-) create mode 100644 optd-sqllogictest/slt/tpch-q10.slt create mode 100644 optd-sqllogictest/slt/tpch-q11.slt.disabled create mode 100644 optd-sqllogictest/slt/tpch-q12.slt create mode 100644 optd-sqllogictest/slt/tpch-q13.slt create mode 100644 optd-sqllogictest/slt/tpch-q14.slt create mode 100644 optd-sqllogictest/slt/tpch-q15.slt create mode 100644 optd-sqllogictest/slt/tpch-q16.slt.disabled create mode 100644 optd-sqllogictest/slt/tpch-q17.slt.disabled create mode 100644 optd-sqllogictest/slt/tpch-q18.slt.disabled create mode 100644 optd-sqllogictest/slt/tpch-q19.slt create mode 100644 optd-sqllogictest/slt/tpch-q20.slt.disabled create mode 100644 optd-sqllogictest/slt/tpch-q21.slt.disabled create mode 100644 optd-sqllogictest/slt/tpch-q22.slt.disabled create mode 100644 optd-sqllogictest/slt/tpch-q4.slt.disabled create mode 100644 optd-sqllogictest/slt/tpch-q7.slt create mode 100644 optd-sqllogictest/slt/tpch-q8.slt create mode 100644 optd-sqllogictest/slt/tpch-q9.slt diff --git a/optd-sqllogictest/slt/tpch-q1.slt b/optd-sqllogictest/slt/tpch-q1.slt index 2507c25a..b4649b26 100644 --- a/optd-sqllogictest/slt/tpch-q1.slt +++ b/optd-sqllogictest/slt/tpch-q1.slt @@ -1,7 +1,7 @@ include _tpch_tables.slt.part query -SELECT +select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, @@ -12,16 +12,18 @@ SELECT avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order -FROM +from lineitem -WHERE - l_shipdate <= date '1998-12-01' - interval '90' day -GROUP BY - l_returnflag, l_linestatus -ORDER BY - l_returnflag, l_linestatus; +where + l_shipdate <= date '1998-12-01' - interval '71' day +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; ---- A F 37474.00 37569624.64 35676192.0970 37101416.222424 25.354533 25419.231826 0.050866 1478 N F 1041.00 1041301.07 999060.8980 1036450.802280 27.394736 27402.659736 0.042894 38 -N O 75168.00 75384955.37 71653166.3034 74498798.133073 25.558653 25632.422771 0.049697 2941 -R F 36511.00 36570841.24 34738472.8758 36169060.112193 25.059025 25100.096938 0.050027 1457 +N O 75823.00 76040604.76 72270477.1588 75140545.284463 25.564059 25637.425745 0.049824 2966 +R F 36511.00 36570841.24 34738472.8758 36169060.112193 25.059025 25100.096938 0.050027 1457 \ No newline at end of file diff --git a/optd-sqllogictest/slt/tpch-q10.slt b/optd-sqllogictest/slt/tpch-q10.slt new file mode 100644 index 00000000..9faf0ea3 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q10.slt @@ -0,0 +1,56 @@ +include _tpch_tables.slt.part + +query +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from + customer, + orders, + lineitem, + nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= date '1993-10-01' + and o_orderdate < date '1993-10-01' + interval '3' month + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc +limit 20; +---- +121 Customer#000000121 282635.1719 6428.32 PERU tv nCR2YKupGN73mQudO 27-411-990-2959 uriously stealthy ideas. carefully final courts use carefully +124 Customer#000000124 222182.5188 1842.49 CHINA aTbyVAW5tCd,v09O 28-183-750-7809 le fluffily even dependencies. quietly s +106 Customer#000000106 190241.3334 3288.42 ARGENTINA xGCOEAUjUNG 11-751-989-4627 lose slyly. ironic accounts along the evenly regular theodolites wake about the special, final gifts. +16 Customer#000000016 161422.0461 4681.03 IRAN cYiaeMLZSMAOQ2 d0W, 20-781-609-3107 kly silent courts. thinly regular theodolites sleep fluffily after +44 Customer#000000044 149364.5652 7315.94 MOZAMBIQUE Oi,dOSPwDu4jo4x,,P85E0dmhZGvNtBwi 26-190-260-5375 r requests around the unusual, bold a +71 Customer#000000071 129481.0245 -611.19 GERMANY TlGalgdXWBmMV,6agLyWYDyIz9MKzcY8gl,w6t1B 17-710-812-5403 g courts across the regular, final pinto beans are blithely pending ac +89 Customer#000000089 121663.1243 1530.76 KENYA dtR, y9JQWUO6FoJExyp8whOU 24-394-451-5404 counts are slyly beyond the slyly final accounts. quickly final ideas wake. r +112 Customer#000000112 111137.7141 2953.35 ROMANIA RcfgG3bO7QeCnfjqJT1 29-233-262-8382 rmanently unusual multipliers. blithely ruthless deposits are furiously along the +62 Customer#000000062 106368.0153 595.61 GERMANY upJK2Dnw13, 17-361-978-7059 kly special dolphins. pinto beans are slyly. quickly regular accounts are furiously a +146 Customer#000000146 103265.9888 3328.68 CANADA GdxkdXG9u7iyI1,,y5tq4ZyrcEy 13-835-723-3223 ffily regular dinos are slyly unusual requests. slyly specia +19 Customer#000000019 99306.0127 8914.71 CHINA uc,3bHIx84H,wdrmLOjVsiqXCq2tr 28-396-526-5053 nag. furiously careful packages are slyly at the accounts. furiously regular in +145 Customer#000000145 99256.9018 9748.93 JORDAN kQjHmt2kcec cy3hfMh969u 23-562-444-8454 ests? express, express instructions use. blithely fina +103 Customer#000000103 97311.7724 2757.45 INDONESIA 8KIsQX4LJ7QMsj6DrtFtXu0nUEdV,8a 19-216-107-2107 furiously pending notornis boost slyly around the blithely ironic ideas? final, even instructions cajole fl +136 Customer#000000136 95855.3980 -842.39 GERMANY QoLsJ0v5C1IQbh,DS1 17-501-210-4726 ackages sleep ironic, final courts. even requests above the blithely bold requests g +53 Customer#000000053 92568.9124 4113.64 MOROCCO HnaxHzTfFTZs8MuCpJyTbZ47Cm4wFOOgib 25-168-852-5363 ar accounts are. even foxes are blithely. fluffily pending deposits boost +49 Customer#000000049 90965.7262 4573.94 IRAN cNgAeX7Fqrdf7HQN9EwjUa4nxT,68L FKAxzl 20-908-631-4424 nusual foxes! fluffily pending packages maintain to the regular +37 Customer#000000037 88065.7458 -917.75 INDIA 7EV4Pwh,3SboctTWt 18-385-235-7162 ilent packages are carefully among the deposits. furiousl +82 Customer#000000082 86998.9644 9468.34 CHINA zhG3EZbap4c992Gj3bK,3Ne,Xn 28-159-442-5305 s wake. bravely regular accounts are furiously. regula +125 Customer#000000125 84808.0680 -234.12 ROMANIA ,wSZXdVR xxIIfm9s8ITyLl3kgjT6UC07GY0Y 29-261-996-3120 x-ray finally after the packages? regular requests c +59 Customer#000000059 84655.5711 3458.60 ARGENTINA zLOCP0wh92OtBihgspOGl4 11-355-584-3112 ously final packages haggle blithely after the express deposits. furiou diff --git a/optd-sqllogictest/slt/tpch-q11.slt.disabled b/optd-sqllogictest/slt/tpch-q11.slt.disabled new file mode 100644 index 00000000..951d3607 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q11.slt.disabled @@ -0,0 +1,31 @@ +include _tpch_tables.slt.part + +query +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as value +from + partsupp, + supplier, + nation +where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' +group by + ps_partkey having + sum(ps_supplycost * ps_availqty) > ( + select + sum(ps_supplycost * ps_availqty) * 0.0001000000 + from + partsupp, + supplier, + nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' + ) +order by + value desc; +---- diff --git a/optd-sqllogictest/slt/tpch-q12.slt b/optd-sqllogictest/slt/tpch-q12.slt new file mode 100644 index 00000000..16d3c0e4 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q12.slt @@ -0,0 +1,34 @@ +include _tpch_tables.slt.part + +query +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + orders, + lineitem +where + o_orderkey = l_orderkey + and l_shipmode in ('MAIL', 'SHIP') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= date '1994-01-01' + and l_receiptdate < date '1994-01-01' + interval '1' year +group by + l_shipmode +order by + l_shipmode; +---- +MAIL 5 5 +SHIP 5 10 diff --git a/optd-sqllogictest/slt/tpch-q13.slt b/optd-sqllogictest/slt/tpch-q13.slt new file mode 100644 index 00000000..a03b967f --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q13.slt @@ -0,0 +1,51 @@ +include _tpch_tables.slt.part + +query +select + c_count, + count(*) as custdist +from + ( + select + c_custkey, + count(o_orderkey) + from + customer left outer join orders on + c_custkey = o_custkey + and o_comment not like '%special%requests%' + group by + c_custkey + ) as c_orders (c_custkey, c_count) +group by + c_count +order by + custdist desc, + c_count desc; +---- +0 50 +16 8 +17 7 +20 6 +13 6 +12 6 +9 6 +23 5 +14 5 +10 5 +21 4 +18 4 +11 4 +8 4 +7 4 +26 3 +22 3 +6 3 +5 3 +4 3 +29 2 +24 2 +19 2 +15 2 +28 1 +25 1 +3 1 \ No newline at end of file diff --git a/optd-sqllogictest/slt/tpch-q14.slt b/optd-sqllogictest/slt/tpch-q14.slt new file mode 100644 index 00000000..8f950952 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q14.slt @@ -0,0 +1,18 @@ +include _tpch_tables.slt.part + +query +select + 100.00 * sum(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 + end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +from + lineitem, + part +where + l_partkey = p_partkey + and l_shipdate >= date '1995-09-01' + and l_shipdate < date '1995-09-01' + interval '1' month; +---- +15.23021261159725 diff --git a/optd-sqllogictest/slt/tpch-q15.slt b/optd-sqllogictest/slt/tpch-q15.slt new file mode 100644 index 00000000..48d8de25 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q15.slt @@ -0,0 +1,40 @@ +include _tpch_tables.slt.part + +statement ok +create view revenue0 (supplier_no, total_revenue) as + select + l_suppkey, + sum(l_extendedprice * (1 - l_discount)) + from + lineitem + where + l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '3' month + group by + l_suppkey; + +query +select + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue +from + supplier, + revenue0 +where + s_suppkey = supplier_no + and total_revenue = ( + select + max(total_revenue) + from + revenue0 + ) +order by + s_suppkey; +---- +10 Supplier#000000010 Saygah3gYWMp72i PY 34-852-489-8585 797313.3838 + +statement ok +drop view revenue0; diff --git a/optd-sqllogictest/slt/tpch-q16.slt.disabled b/optd-sqllogictest/slt/tpch-q16.slt.disabled new file mode 100644 index 00000000..f480ca1b --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q16.slt.disabled @@ -0,0 +1,68 @@ +include _tpch_tables.slt.part + +query +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; +---- +Brand#11 PROMO ANODIZED TIN 45 4 +Brand#11 SMALL PLATED COPPER 45 4 +Brand#11 STANDARD POLISHED TIN 45 4 +Brand#13 MEDIUM ANODIZED STEEL 36 4 +Brand#14 SMALL ANODIZED NICKEL 45 4 +Brand#15 LARGE ANODIZED BRASS 45 4 +Brand#21 LARGE BURNISHED COPPER 19 4 +Brand#23 ECONOMY BRUSHED COPPER 9 4 +Brand#25 MEDIUM PLATED BRASS 45 4 +Brand#31 ECONOMY PLATED STEEL 23 4 +Brand#31 PROMO POLISHED TIN 23 4 +Brand#32 MEDIUM BURNISHED BRASS 49 4 +Brand#33 LARGE BRUSHED TIN 36 4 +Brand#33 SMALL BURNISHED NICKEL 3 4 +Brand#34 LARGE PLATED BRASS 45 4 +Brand#34 MEDIUM BRUSHED COPPER 9 4 +Brand#34 SMALL PLATED BRASS 14 4 +Brand#35 STANDARD ANODIZED STEEL 23 4 +Brand#43 PROMO POLISHED BRASS 19 4 +Brand#43 SMALL BRUSHED NICKEL 9 4 +Brand#44 SMALL PLATED COPPER 19 4 +Brand#52 MEDIUM BURNISHED TIN 45 4 +Brand#52 SMALL BURNISHED NICKEL 14 4 +Brand#53 MEDIUM BRUSHED COPPER 3 4 +Brand#55 STANDARD ANODIZED BRASS 36 4 +Brand#55 STANDARD BRUSHED COPPER 3 4 +Brand#13 SMALL BRUSHED NICKEL 19 2 +Brand#25 SMALL BURNISHED COPPER 3 2 +Brand#43 MEDIUM ANODIZED BRASS 14 2 +Brand#53 STANDARD PLATED STEEL 45 2 +Brand#24 MEDIUM PLATED STEEL 19 1 +Brand#51 ECONOMY POLISHED STEEL 49 1 +Brand#53 LARGE BURNISHED NICKEL 23 1 +Brand#54 ECONOMY ANODIZED BRASS 9 1 diff --git a/optd-sqllogictest/slt/tpch-q17.slt.disabled b/optd-sqllogictest/slt/tpch-q17.slt.disabled new file mode 100644 index 00000000..a08de667 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q17.slt.disabled @@ -0,0 +1,22 @@ +include _tpch_tables.slt.part + +query +select + sum(l_extendedprice) / 7.0 as avg_yearly +from + lineitem, + part +where + p_partkey = l_partkey + and p_brand = 'Brand#53' -- original: Brand#23 + and p_container = 'MED BOX' + and l_quantity < ( + select + 0.2 * avg(l_quantity) + from + lineitem + where + l_partkey = p_partkey + ); +---- +863.2285714285714285714285714 diff --git a/optd-sqllogictest/slt/tpch-q18.slt.disabled b/optd-sqllogictest/slt/tpch-q18.slt.disabled new file mode 100644 index 00000000..68194dc7 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q18.slt.disabled @@ -0,0 +1,41 @@ +include _tpch_tables.slt.part + +query +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from + customer, + orders, + lineitem +where + o_orderkey in ( + select + l_orderkey + from + lineitem + group by + l_orderkey having + sum(l_quantity) > 250 -- original: 300 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate +limit 100; +---- +Customer#000000070 70 2567 1998-02-27 263411.29 266.00 +Customer#000000010 10 4421 1997-04-04 258779.02 255.00 +Customer#000000082 82 3460 1995-10-03 245976.74 254.00 +Customer#000000068 68 2208 1995-05-01 245388.06 256.00 diff --git a/optd-sqllogictest/slt/tpch-q19.slt b/optd-sqllogictest/slt/tpch-q19.slt new file mode 100644 index 00000000..563c5041 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q19.slt @@ -0,0 +1,40 @@ +include _tpch_tables.slt.part + +query +select + sum(l_extendedprice* (1 - l_discount)) as revenue +from + lineitem, + part +where + ( + p_partkey = l_partkey + and p_brand = 'Brand#12' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 1 and l_quantity <= 1 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 10 and l_quantity <= 10 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#33' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 20 and l_quantity <= 20 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ); +---- +24521.1300 diff --git a/optd-sqllogictest/slt/tpch-q2.slt.disabled b/optd-sqllogictest/slt/tpch-q2.slt.disabled index 28d36ee8..61c622fe 100644 --- a/optd-sqllogictest/slt/tpch-q2.slt.disabled +++ b/optd-sqllogictest/slt/tpch-q2.slt.disabled @@ -2,49 +2,47 @@ include _tpch_tables.slt.part query select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment from - part, - supplier, - partsupp, - nation, - region + part, + supplier, + partsupp, + nation, + region where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey -and p_size = 4 -and p_type like '%TIN' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AFRICA' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AFRICA' - ) + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 1 + and p_type like '%TIN' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'AFRICA' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'AFRICA' + ) order by s_acctbal desc, n_name, s_name, - p_partkey -limit 100; + p_partkey; ---- -4641.08 Supplier#000000004 MOROCCO 100 Manufacturer#3 Bk7ah4CK8SYQTepEmvMkkgMwg 25-843-787-7479 riously even requests above the exp -1365.79 Supplier#000000006 KENYA 185 Manufacturer#4 tQxuVm7s7CnK 24-696-997-4969 final accounts. regular dolphins use against the furiously ironic decoys. +1365.79 Supplier#000000006 KENYA 154 Manufacturer#1 tQxuVm7s7CnK 24-696-997-4969 final accounts. regular dolphins use against the furiously ironic decoys. diff --git a/optd-sqllogictest/slt/tpch-q20.slt.disabled b/optd-sqllogictest/slt/tpch-q20.slt.disabled new file mode 100644 index 00000000..cfce8398 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q20.slt.disabled @@ -0,0 +1,42 @@ +include _tpch_tables.slt.part + +query +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'indian%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'IRAQ' +order by + s_name; +---- +Supplier#000000005 Gcdm2rJRzl5qlTVzc diff --git a/optd-sqllogictest/slt/tpch-q21.slt.disabled b/optd-sqllogictest/slt/tpch-q21.slt.disabled new file mode 100644 index 00000000..908b20cf --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q21.slt.disabled @@ -0,0 +1,44 @@ +include _tpch_tables.slt.part + +query +select + s_name, + count(*) as numwait +from + supplier, + lineitem l1, + orders, + nation +where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by + s_name +order by + numwait desc, + s_name +limit 100; +---- diff --git a/optd-sqllogictest/slt/tpch-q22.slt.disabled b/optd-sqllogictest/slt/tpch-q22.slt.disabled new file mode 100644 index 00000000..cae50bca --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q22.slt.disabled @@ -0,0 +1,48 @@ +include _tpch_tables.slt.part + +query +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substring(c_phone from 1 for 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode; +---- +13 1 5679.84 +17 1 9127.27 +18 2 14647.99 +23 1 9255.67 +29 2 17195.08 +30 1 7638.57 +31 1 9331.13 diff --git a/optd-sqllogictest/slt/tpch-q3.slt b/optd-sqllogictest/slt/tpch-q3.slt index d0f374e1..f315a028 100644 --- a/optd-sqllogictest/slt/tpch-q3.slt +++ b/optd-sqllogictest/slt/tpch-q3.slt @@ -1,36 +1,35 @@ include _tpch_tables.slt.part query -SELECT +select l_orderkey, - SUM(l_extendedprice * (1 - l_discount)) AS revenue, + sum(l_extendedprice * (1 - l_discount)) as revenue, o_orderdate, - o_shippriority -FROM + o_shippriority +from customer, orders, - lineitem -WHERE - c_mktsegment = 'FURNITURE' - AND c_custkey = o_custkey - AND l_orderkey = o_orderkey - AND o_orderdate < DATE '1995-03-29' - AND l_shipdate > DATE '1995-03-29' -GROUP BY + lineitem +where + c_mktsegment = 'BUILDING' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < date '1995-03-15' + and l_shipdate > date '1995-03-15' +group by l_orderkey, o_orderdate, - o_shippriority -ORDER BY - revenue DESC, - o_orderdate LIMIT 10; + o_shippriority +order by + revenue desc, + o_orderdate +limit 10; ---- -3588 199498.4104 1995-03-19 0 -4327 123939.6659 1995-03-16 0 -5347 118914.1310 1995-02-22 0 -450 111508.1186 1995-03-05 0 -1767 109576.4152 1995-03-14 0 -386 86258.3745 1995-01-25 0 -897 54854.6063 1995-03-20 0 -2982 54150.4719 1995-03-19 0 -3526 51178.7043 1995-03-16 0 -2277 42401.7338 1995-01-02 0 +1637 164224.9253 1995-02-08 0 +5191 49378.3094 1994-12-11 0 +742 43728.0480 1994-12-23 0 +3492 43716.0724 1994-11-24 0 +2883 36666.9612 1995-01-23 0 +998 11785.5486 1994-11-26 0 +3430 4726.6775 1994-12-12 0 +4423 3055.9365 1995-02-17 0 diff --git a/optd-sqllogictest/slt/tpch-q4.slt.disabled b/optd-sqllogictest/slt/tpch-q4.slt.disabled new file mode 100644 index 00000000..dc991161 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q4.slt.disabled @@ -0,0 +1,30 @@ +include _tpch_tables.slt.part + +query +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; +---- +1-URGENT 9 +2-HIGH 7 +3-MEDIUM 9 +4-NOT SPECIFIED 8 +5-LOW 12 diff --git a/optd-sqllogictest/slt/tpch-q5.slt b/optd-sqllogictest/slt/tpch-q5.slt index daedb301..e3c4a661 100644 --- a/optd-sqllogictest/slt/tpch-q5.slt +++ b/optd-sqllogictest/slt/tpch-q5.slt @@ -1,31 +1,30 @@ include _tpch_tables.slt.part query -SELECT - n_name AS nation, - SUM(l_extendedprice * (1 - l_discount)) AS revenue -FROM +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from customer, orders, lineitem, supplier, nation, region -WHERE +where c_custkey = o_custkey - AND l_orderkey = o_orderkey - AND l_suppkey = s_suppkey - AND c_nationkey = s_nationkey - AND s_nationkey = n_nationkey - AND n_regionkey = r_regionkey - AND r_name = 'AFRICA' -- Specified region - AND o_orderdate >= DATE '1993-01-01' - AND o_orderdate < DATE '1994-01-01' -GROUP BY + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'AFRICA' + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1994-01-01' + interval '1' year +group by n_name -ORDER BY - revenue DESC; +order by + revenue desc; ---- -MOROCCO 119356.5868 -ETHIOPIA 62766.6740 -KENYA 3014.4444 +MOROCCO 220457.0142 +ETHIOPIA 115183.8546 diff --git a/optd-sqllogictest/slt/tpch-q6.slt b/optd-sqllogictest/slt/tpch-q6.slt index 12398dad..18ff26ef 100644 --- a/optd-sqllogictest/slt/tpch-q6.slt +++ b/optd-sqllogictest/slt/tpch-q6.slt @@ -1,14 +1,14 @@ include _tpch_tables.slt.part query -SELECT - SUM(l_extendedprice * l_discount) AS revenue_loss -FROM +select + sum(l_extendedprice * l_discount) as revenue +from lineitem -WHERE - l_shipdate >= DATE '1997-01-01' - AND l_shipdate < DATE '1998-01-01' - AND l_discount BETWEEN 0.05 AND 0.07 - AND l_quantity < 24; +where + l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + and l_discount between 0.08 - 0.01 and 0.08 + 0.01 + and l_quantity < 24; ---- -94385.9721 +90927.6243 diff --git a/optd-sqllogictest/slt/tpch-q7.slt b/optd-sqllogictest/slt/tpch-q7.slt new file mode 100644 index 00000000..c9a543f8 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q7.slt @@ -0,0 +1,45 @@ +include _tpch_tables.slt.part + +query +select + supp_nation, + cust_nation, + l_year, + sum(volume) as revenue +from + ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + extract(year from l_shipdate) as l_year, + l_extendedprice * (1 - l_discount) as volume + from + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 + where + s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = 'UNITED STATES' and n2.n_name = 'CHINA') + or (n1.n_name = 'CHINA' and n2.n_name = 'UNITED STATES') + ) + and l_shipdate between date '1995-01-01' and date '1996-12-31' + ) as shipping +group by + supp_nation, + cust_nation, + l_year +order by + supp_nation, + cust_nation, + l_year; +---- +UNITED STATES CHINA 1995.0 130212.3261 +UNITED STATES CHINA 1996.0 195468.6891 \ No newline at end of file diff --git a/optd-sqllogictest/slt/tpch-q8.slt b/optd-sqllogictest/slt/tpch-q8.slt new file mode 100644 index 00000000..c7787f02 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q8.slt @@ -0,0 +1,43 @@ +include _tpch_tables.slt.part + +query +select + o_year, + sum(case + when nation = 'IRAQ' then volume + else 0 + end) / sum(volume) as mkt_share +from + ( + select + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'AMERICA' + and s_nationkey = n2.n_nationkey + and o_orderdate between date '1995-01-01' and date '1996-12-31' + and p_type = 'ECONOMY ANODIZED STEEL' + ) as all_nations +group by + o_year +order by + o_year; +---- +1995.0 1.00000000 +1996.0 0.32989690 diff --git a/optd-sqllogictest/slt/tpch-q9.slt b/optd-sqllogictest/slt/tpch-q9.slt new file mode 100644 index 00000000..1c995840 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q9.slt @@ -0,0 +1,96 @@ +include _tpch_tables.slt.part + +query +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%green%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc; +---- +ARGENTINA 1998.0 17779.0697 +ARGENTINA 1997.0 13943.9538 +ARGENTINA 1996.0 7641.4227 +ARGENTINA 1995.0 20892.7525 +ARGENTINA 1994.0 15088.3526 +ARGENTINA 1993.0 17586.3446 +ARGENTINA 1992.0 28732.4615 +ETHIOPIA 1998.0 28217.1600 +ETHIOPIA 1996.0 33970.6500 +ETHIOPIA 1995.0 37720.3500 +ETHIOPIA 1994.0 37251.0100 +ETHIOPIA 1993.0 23782.6100 +IRAN 1997.0 23590.0080 +IRAN 1996.0 7428.2325 +IRAN 1995.0 21000.9965 +IRAN 1994.0 29408.1300 +IRAN 1993.0 49876.4150 +IRAN 1992.0 52064.2400 +IRAQ 1998.0 11619.9604 +IRAQ 1997.0 47910.2460 +IRAQ 1996.0 18459.5675 +IRAQ 1995.0 32782.3701 +IRAQ 1994.0 9041.2317 +IRAQ 1993.0 30687.2625 +IRAQ 1992.0 29098.2557 +KENYA 1998.0 33148.3345 +KENYA 1997.0 54355.0165 +KENYA 1996.0 53607.4854 +KENYA 1995.0 85354.8738 +KENYA 1994.0 102904.2511 +KENYA 1993.0 109310.8084 +KENYA 1992.0 138534.1210 +MOROCCO 1998.0 157058.2328 +MOROCCO 1997.0 88669.9610 +MOROCCO 1996.0 236833.6672 +MOROCCO 1995.0 381575.8668 +MOROCCO 1994.0 243523.4336 +MOROCCO 1993.0 232196.7803 +MOROCCO 1992.0 347434.1452 +PERU 1998.0 101109.0196 +PERU 1997.0 58073.0866 +PERU 1996.0 30360.5218 +PERU 1995.0 138451.7800 +PERU 1994.0 55023.0632 +PERU 1993.0 110409.0863 +PERU 1992.0 70946.1916 +UNITED KINGDOM 1998.0 139685.0440 +UNITED KINGDOM 1997.0 183502.0498 +UNITED KINGDOM 1996.0 374085.2884 +UNITED KINGDOM 1995.0 548356.7984 +UNITED KINGDOM 1994.0 266982.7680 +UNITED KINGDOM 1993.0 717309.4640 +UNITED KINGDOM 1992.0 79540.6016 +UNITED STATES 1998.0 32847.9600 +UNITED STATES 1997.0 30849.5000 +UNITED STATES 1996.0 56125.4600 +UNITED STATES 1995.0 15961.7977 +UNITED STATES 1994.0 31671.2000 +UNITED STATES 1993.0 55057.4690 +UNITED STATES 1992.0 51970.2300 \ No newline at end of file From 5e4f33a8314fbe721ea2948b7a75579dbfe23e20 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 5 Dec 2024 20:50:34 -0500 Subject: [PATCH 02/47] Newline normalization --- optd-sqllogictest/slt/tpch-q1.slt | 2 +- optd-sqllogictest/slt/tpch-q13.slt | 2 +- optd-sqllogictest/slt/tpch-q7.slt | 2 +- optd-sqllogictest/slt/tpch-q9.slt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/optd-sqllogictest/slt/tpch-q1.slt b/optd-sqllogictest/slt/tpch-q1.slt index b4649b26..bf5975cd 100644 --- a/optd-sqllogictest/slt/tpch-q1.slt +++ b/optd-sqllogictest/slt/tpch-q1.slt @@ -26,4 +26,4 @@ order by A F 37474.00 37569624.64 35676192.0970 37101416.222424 25.354533 25419.231826 0.050866 1478 N F 1041.00 1041301.07 999060.8980 1036450.802280 27.394736 27402.659736 0.042894 38 N O 75823.00 76040604.76 72270477.1588 75140545.284463 25.564059 25637.425745 0.049824 2966 -R F 36511.00 36570841.24 34738472.8758 36169060.112193 25.059025 25100.096938 0.050027 1457 \ No newline at end of file +R F 36511.00 36570841.24 34738472.8758 36169060.112193 25.059025 25100.096938 0.050027 1457 diff --git a/optd-sqllogictest/slt/tpch-q13.slt b/optd-sqllogictest/slt/tpch-q13.slt index a03b967f..46fcd388 100644 --- a/optd-sqllogictest/slt/tpch-q13.slt +++ b/optd-sqllogictest/slt/tpch-q13.slt @@ -48,4 +48,4 @@ order by 15 2 28 1 25 1 -3 1 \ No newline at end of file +3 1 diff --git a/optd-sqllogictest/slt/tpch-q7.slt b/optd-sqllogictest/slt/tpch-q7.slt index c9a543f8..b061d9de 100644 --- a/optd-sqllogictest/slt/tpch-q7.slt +++ b/optd-sqllogictest/slt/tpch-q7.slt @@ -42,4 +42,4 @@ order by l_year; ---- UNITED STATES CHINA 1995.0 130212.3261 -UNITED STATES CHINA 1996.0 195468.6891 \ No newline at end of file +UNITED STATES CHINA 1996.0 195468.6891 diff --git a/optd-sqllogictest/slt/tpch-q9.slt b/optd-sqllogictest/slt/tpch-q9.slt index 1c995840..fc9d078a 100644 --- a/optd-sqllogictest/slt/tpch-q9.slt +++ b/optd-sqllogictest/slt/tpch-q9.slt @@ -93,4 +93,4 @@ UNITED STATES 1996.0 56125.4600 UNITED STATES 1995.0 15961.7977 UNITED STATES 1994.0 31671.2000 UNITED STATES 1993.0 55057.4690 -UNITED STATES 1992.0 51970.2300 \ No newline at end of file +UNITED STATES 1992.0 51970.2300 From 298ab67a09b3f8808b957855784187c4a4f13e27 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 5 Dec 2024 21:56:21 -0500 Subject: [PATCH 03/47] Fix BinOp schema property issue --- optd-datafusion-repr/src/properties/schema.rs | 6 +++++- .../slt/{tpch-q11.slt.disabled => tpch-q11.slt} | 0 2 files changed, 5 insertions(+), 1 deletion(-) rename optd-sqllogictest/slt/{tpch-q11.slt.disabled => tpch-q11.slt} (100%) diff --git a/optd-datafusion-repr/src/properties/schema.rs b/optd-datafusion-repr/src/properties/schema.rs index 6c1befd5..b348c87c 100644 --- a/optd-datafusion-repr/src/properties/schema.rs +++ b/optd-datafusion-repr/src/properties/schema.rs @@ -119,7 +119,11 @@ impl SchemaPropertyBuilder { DfPredType::LogOp(_) => Schema { fields: vec![Field::placeholder(); children.len()], }, - + DfPredType::BinOp(_) => { + let mut fields = children[0].fields.clone(); + fields.extend(children[1].fields.clone()); + Schema { fields } + } DfPredType::Cast => Schema { fields: children[0] .fields diff --git a/optd-sqllogictest/slt/tpch-q11.slt.disabled b/optd-sqllogictest/slt/tpch-q11.slt similarity index 100% rename from optd-sqllogictest/slt/tpch-q11.slt.disabled rename to optd-sqllogictest/slt/tpch-q11.slt From 6547052c9f472bfa9a2db6c8773827dbc953adeb Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 5 Dec 2024 21:58:55 -0500 Subject: [PATCH 04/47] tpch q11 fix --- optd-datafusion-repr/src/properties/schema.rs | 6 +++++- .../slt/{tpch-q11.slt.disabled => tpch-q11.slt} | 0 2 files changed, 5 insertions(+), 1 deletion(-) rename optd-sqllogictest/slt/{tpch-q11.slt.disabled => tpch-q11.slt} (100%) diff --git a/optd-datafusion-repr/src/properties/schema.rs b/optd-datafusion-repr/src/properties/schema.rs index 6c1befd5..b348c87c 100644 --- a/optd-datafusion-repr/src/properties/schema.rs +++ b/optd-datafusion-repr/src/properties/schema.rs @@ -119,7 +119,11 @@ impl SchemaPropertyBuilder { DfPredType::LogOp(_) => Schema { fields: vec![Field::placeholder(); children.len()], }, - + DfPredType::BinOp(_) => { + let mut fields = children[0].fields.clone(); + fields.extend(children[1].fields.clone()); + Schema { fields } + } DfPredType::Cast => Schema { fields: children[0] .fields diff --git a/optd-sqllogictest/slt/tpch-q11.slt.disabled b/optd-sqllogictest/slt/tpch-q11.slt similarity index 100% rename from optd-sqllogictest/slt/tpch-q11.slt.disabled rename to optd-sqllogictest/slt/tpch-q11.slt From 768982d6e858c547c551808bf33f07cd58c24bb8 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 5 Dec 2024 22:04:35 -0500 Subject: [PATCH 05/47] Update sqlplannertest plans --- .../tests/tpch/tpch-11-15.planner.sql | 71 ++++++++++--------- .../tests/tpch/tpch-16-20.planner.sql | 2 +- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/optd-sqlplannertest/tests/tpch/tpch-11-15.planner.sql b/optd-sqlplannertest/tests/tpch/tpch-11-15.planner.sql index 235fe273..ec31472a 100644 --- a/optd-sqlplannertest/tests/tpch/tpch-11-15.planner.sql +++ b/optd-sqlplannertest/tests/tpch/tpch-11-15.planner.sql @@ -195,45 +195,46 @@ LogicalSort PhysicalSort ├── exprs:SortOrder { order: Desc } │ └── #1 -└── PhysicalNestedLoopJoin - ├── join_type: Inner - ├── cond:Gt - │ ├── Cast { cast_to: Decimal128(38, 15), child: #1 } - │ └── #0 - ├── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── Mul - │ │ ├── #2 - │ │ └── Cast { cast_to: Decimal128(10, 0), child: #1 } - │ ├── groups: [ #0 ] - │ └── PhysicalProjection { exprs: [ #11, #13, #14 ] } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] } - │ │ ├── PhysicalFilter - │ │ │ ├── cond:Eq - │ │ │ │ ├── #1 - │ │ │ │ └── "CHINA" - │ │ │ └── PhysicalScan { table: nation } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalScan { table: partsupp } - └── PhysicalProjection - ├── exprs:Cast - │ ├── cast_to: Decimal128(38, 15) - │ ├── child:Mul - │ │ ├── Cast { cast_to: Float64, child: #0 } - │ │ └── 0.0001(float) +└── PhysicalProjection { exprs: [ #1, #2 ] } + └── PhysicalNestedLoopJoin + ├── join_type: Inner + ├── cond:Gt + │ ├── Cast { cast_to: Decimal128(38, 15), child: #2 } + │ └── #0 + ├── PhysicalProjection + │ ├── exprs:Cast + │ │ ├── cast_to: Decimal128(38, 15) + │ │ ├── child:Mul + │ │ │ ├── Cast { cast_to: Float64, child: #0 } + │ │ │ └── 0.0001(float) + │ └── PhysicalAgg + │ ├── aggrs:Agg(Sum) + │ │ └── Mul + │ │ ├── #1 + │ │ └── Cast { cast_to: Decimal128(10, 0), child: #0 } + │ ├── groups: [] + │ └── PhysicalProjection { exprs: [ #13, #14 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] } + │ │ ├── PhysicalFilter + │ │ │ ├── cond:Eq + │ │ │ │ ├── #1 + │ │ │ │ └── "CHINA" + │ │ │ └── PhysicalScan { table: nation } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalScan { table: partsupp } └── PhysicalAgg ├── aggrs:Agg(Sum) │ └── Mul - │ ├── #1 - │ └── Cast { cast_to: Decimal128(10, 0), child: #0 } - ├── groups: [] - └── PhysicalProjection { exprs: [ #0, #1 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } - ├── PhysicalProjection { exprs: [ #1, #2, #4 ] } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ ├── PhysicalProjection { exprs: [ #1, #2, #3 ] } + │ ├── #2 + │ └── Cast { cast_to: Decimal128(10, 0), child: #1 } + ├── groups: [ #0 ] + └── PhysicalProjection { exprs: [ #0, #1, #2 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } + ├── PhysicalProjection { exprs: [ #0, #2, #3, #5 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + │ ├── PhysicalProjection { exprs: [ #0, #1, #2, #3 ] } │ │ └── PhysicalScan { table: partsupp } │ └── PhysicalProjection { exprs: [ #0, #3 ] } │ └── PhysicalScan { table: supplier } diff --git a/optd-sqlplannertest/tests/tpch/tpch-16-20.planner.sql b/optd-sqlplannertest/tests/tpch/tpch-16-20.planner.sql index c8a8deb8..a02c6b73 100644 --- a/optd-sqlplannertest/tests/tpch/tpch-16-20.planner.sql +++ b/optd-sqlplannertest/tests/tpch/tpch-16-20.planner.sql @@ -180,7 +180,7 @@ PhysicalProjection ├── cond:And │ ├── Eq │ │ ├── #0 - │ │ └── #13 + │ │ └── #26 │ └── Lt │ ├── Cast { cast_to: Decimal128(30, 15), child: #13 } │ └── #25 From 01774d1500f99126d2541df42ec7b03c557f8765 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Thu, 5 Dec 2024 22:06:00 -0500 Subject: [PATCH 06/47] Delete q11 again --- optd-sqllogictest/slt/tpch-q11.slt.disabled | 31 --------------------- 1 file changed, 31 deletions(-) delete mode 100644 optd-sqllogictest/slt/tpch-q11.slt.disabled diff --git a/optd-sqllogictest/slt/tpch-q11.slt.disabled b/optd-sqllogictest/slt/tpch-q11.slt.disabled deleted file mode 100644 index 951d3607..00000000 --- a/optd-sqllogictest/slt/tpch-q11.slt.disabled +++ /dev/null @@ -1,31 +0,0 @@ -include _tpch_tables.slt.part - -query -select - ps_partkey, - sum(ps_supplycost * ps_availqty) as value -from - partsupp, - supplier, - nation -where - ps_suppkey = s_suppkey - and s_nationkey = n_nationkey - and n_name = 'GERMANY' -group by - ps_partkey having - sum(ps_supplycost * ps_availqty) > ( - select - sum(ps_supplycost * ps_availqty) * 0.0001000000 - from - partsupp, - supplier, - nation - where - ps_suppkey = s_suppkey - and s_nationkey = n_nationkey - and n_name = 'GERMANY' - ) -order by - value desc; ----- From 4c2c7fd75bfe666b900bfa79c203bd5563ec091f Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 00:40:07 -0500 Subject: [PATCH 07/47] fix a couple of depjoin agg pushdown bugs --- .../src/rules/subquery/depjoin_pushdown.rs | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 6e60f122..cebba545 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -177,16 +177,18 @@ fn apply_dep_join_past_proj( let cond = join.cond(); let extern_cols = join.extern_cols(); let proj = LogicalProjection::from_plan_node(right.unwrap_plan_node()).unwrap(); + let proj_exprs = proj.exprs(); let right = proj.child(); // TODO: can we have external columns in projection node? I don't think so? // Cross join should always have true cond assert!(cond == ConstantPred::bool(true).into_pred_node()); let left_schema_len = optimizer.get_schema_of(left.clone()).len(); - let right_schema_len = optimizer.get_schema_of(right.clone()).len(); - let right_cols_proj = - (0..right_schema_len).map(|x| ColumnRefPred::new(x + left_schema_len).into_pred_node()); + let right_cols_proj = proj_exprs.to_vec().into_iter().map(|x| { + x.rewrite_column_refs(|col| Some(col + left_schema_len)) + .unwrap() + }); let left_cols_proj = (0..left_schema_len).map(|x| ColumnRefPred::new(x).into_pred_node()); let new_proj_exprs = ListPred::new( @@ -281,7 +283,7 @@ define_rule!( /// talk by Mark Raasveldt. The correlated columns are covered in the original paper. /// /// TODO: the outer join is not implemented yet, so some edge cases won't work. -/// Run SQList tests to catch these, I guess. +/// Run SQLite tests to catch these, I guess. fn apply_dep_join_past_agg( _optimizer: &impl Optimizer, binding: ArcDfPlanNode, @@ -310,15 +312,14 @@ fn apply_dep_join_past_agg( }) .collect::>(); + // We need to group by all correlated columns. + // In our initial distinct step, we installed an agg node that groups by all correlated columns. + // Keeping this in mind, we only need to append a sequential number for each correlated column, + // as these will correspond to the outputs of the agg node. let new_groups = ListPred::new( - groups - .to_vec() - .into_iter() - .map(|x| { - x.rewrite_column_refs(|col| Some(col + correlated_col_indices.len())) - .unwrap() - }) - .chain(correlated_col_indices.iter().map(|x| { + (0..correlated_col_indices.len()) + .map(|x| ColumnRefPred::new(x).into_pred_node()) + .chain(groups.to_vec().into_iter().map(|x| { x.rewrite_column_refs(|col| Some(col + correlated_col_indices.len())) .unwrap() })) From 66a310d78fd1219b28389d43835630f4b5f2da86 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 00:40:25 -0500 Subject: [PATCH 08/47] un-disable tpch-q17 --- optd-sqllogictest/slt/{tpch-q17.slt.disabled => tpch-q17.slt} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename optd-sqllogictest/slt/{tpch-q17.slt.disabled => tpch-q17.slt} (92%) diff --git a/optd-sqllogictest/slt/tpch-q17.slt.disabled b/optd-sqllogictest/slt/tpch-q17.slt similarity index 92% rename from optd-sqllogictest/slt/tpch-q17.slt.disabled rename to optd-sqllogictest/slt/tpch-q17.slt index a08de667..d6e658d0 100644 --- a/optd-sqllogictest/slt/tpch-q17.slt.disabled +++ b/optd-sqllogictest/slt/tpch-q17.slt @@ -19,4 +19,4 @@ where l_partkey = p_partkey ); ---- -863.2285714285714285714285714 +863.2285714285715 From 21d490afff724a9f2a84cb65c173b5c5c37bf188 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 01:49:54 -0500 Subject: [PATCH 09/47] Fix another bug w/ init distinct --- .../src/rules/subquery/depjoin_pushdown.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index cebba545..da2aaea3 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -119,11 +119,13 @@ fn apply_dep_initial_distinct( // (they will have the same index, just shifted over) let join_cond = LogOpPred::new( LogOpType::And, - (0..correlated_col_indices.len()) - .map(|i| { + correlated_col_indices + .iter() + .enumerate() + .map(|(i, x)| { assert!(i + left_schema_size < left_schema_size + right_schema_size); BinOpPred::new( - ColumnRefPred::new(i).into_pred_node(), + ColumnRefPred::new(*x).into_pred_node(), ColumnRefPred::new(i + left_schema_size).into_pred_node(), BinOpType::Eq, ) From f4108b3137785a9546191931493fe7a12089152d Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 01:51:39 -0500 Subject: [PATCH 10/47] Write comment for init distinct fix --- .../src/rules/subquery/depjoin_pushdown.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index da2aaea3..1c087554 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -116,7 +116,12 @@ fn apply_dep_initial_distinct( // Our join condition is going to make sure that all of the correlated columns // in the right side are equal to their equivalent columns in the left side. - // (they will have the same index, just shifted over) + // + // If we have correlated columns [#16, #17], we want our condition to be: + // #16 = #0 AND #17 = #1 + // + // This is because the aggregate we install on the right side will map the + // correlated columns to their respective indices as shown. let join_cond = LogOpPred::new( LogOpType::And, correlated_col_indices From f055605f349f77ff0bfe16cb10dac5bc034d70d2 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 02:00:26 -0500 Subject: [PATCH 11/47] Update sqlplannertest plans --- .../tests/subqueries/subquery_unnesting.planner.sql | 8 ++++---- optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql index b6b0bbec..c14291a1 100644 --- a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql +++ b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql @@ -44,7 +44,7 @@ LogicalProjection { exprs: [ #0, #1 ] } └── LogicalAgg ├── exprs:Agg(Sum) │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #1 ] + ├── groups: [ #0 ] └── LogicalFilter ├── cond:Eq │ ├── #1 @@ -64,7 +64,7 @@ PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=18005,io=3000}, stat: {ro │ └── PhysicalAgg │ ├── aggrs:Agg(Sum) │ │ └── [ Cast { cast_to: Int64, child: #2 } ] - │ ├── groups: [ #1 ] + │ ├── groups: [ #0 ] │ ├── cost: {compute=14000,io=2000} │ ├── stat: {row_cnt=1000} │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } @@ -118,7 +118,7 @@ LogicalProjection { exprs: [ #0, #1 ] } └── LogicalAgg ├── exprs:Agg(Sum) │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #1 ] + ├── groups: [ #0 ] └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } └── LogicalFilter ├── cond:And @@ -145,7 +145,7 @@ PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=21005,io=4000}, stat: {ro │ └── PhysicalAgg │ ├── aggrs:Agg(Sum) │ │ └── [ Cast { cast_to: Int64, child: #2 } ] - │ ├── groups: [ #1 ] + │ ├── groups: [ #0 ] │ ├── cost: {compute=17000,io=3000} │ ├── stat: {row_cnt=1000} │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ], cost: {compute=9000,io=3000}, stat: {row_cnt=1000} } diff --git a/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql b/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql index c5edf635..0ee8d3c0 100644 --- a/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql +++ b/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql @@ -358,7 +358,7 @@ PhysicalLimit { skip: 0(u64), fetch: 100(u64) } └── PhysicalAgg ├── aggrs:Agg(Min) │ └── [ #4 ] - ├── groups: [ #1 ] + ├── groups: [ #0 ] └── PhysicalFilter ├── cond:And │ ├── Eq From 0dcce3d8a2aa179b82719ee4955e1e2e2fd7498d Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 17:30:29 -0500 Subject: [PATCH 12/47] Add test for out-of-order extern columns in subquery --- .../slt/unnest-extern-out-of-order.slt | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 optd-sqllogictest/slt/unnest-extern-out-of-order.slt diff --git a/optd-sqllogictest/slt/unnest-extern-out-of-order.slt b/optd-sqllogictest/slt/unnest-extern-out-of-order.slt new file mode 100644 index 00000000..146d72e3 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-extern-out-of-order.slt @@ -0,0 +1,20 @@ +include _tpch_tables.slt.part + +# A query with a correlated subquery that retrieves columns out of order +# i.e. the extern columns are not of the format [#0, #1, ...] +# This query has extern columns [#1] +query +select + l_orderkey, + l_partkey, + l_extendedprice, + ( + select avg(p_size) + from part + where p_partkey = l_partkey + ) as avg_extendedprice +from lineitem +where l_extendedprice > 55000; +---- +1121 200 55010.00 22.0 +4931 200 55010.00 22.0 From b468692cfe4321d40090aac49511cb55a16be057 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 18:42:21 -0500 Subject: [PATCH 13/47] add unnest test w/ nulls from agg --- optd-sqllogictest/slt/unnest-agg-nulls.slt | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 optd-sqllogictest/slt/unnest-agg-nulls.slt diff --git a/optd-sqllogictest/slt/unnest-agg-nulls.slt b/optd-sqllogictest/slt/unnest-agg-nulls.slt new file mode 100644 index 00000000..8229dbf4 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-agg-nulls.slt @@ -0,0 +1,23 @@ +include _basic_tables.slt.part + +# This query has NULL values from the subquery agg. It won't work without the +# outer join fix. +# It also has out-of-order extern column [#1] +query +select + v1, + v2, + ( + select avg(v4) + from t2 + where v4 = v2 + ) as avg_v4 +from t1; +---- +1 100 +2 200 200.0 +2 250 250.0 +3 300 300.0 +3 300 300.0 + +# todo add COUNT(*) From 78c2b5e8430f76bce993f55d5f6fe9afc92b67de Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 18:47:11 -0500 Subject: [PATCH 14/47] Implement outer join agg null fix --- .../src/rules/subquery/depjoin_pushdown.rs | 65 +++++++++++++++++-- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 1c087554..5e59f20c 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -288,11 +288,8 @@ define_rule!( /// deduplicated set). /// For info on why we do the outer join, refer to the Unnesting Arbitrary Queries /// talk by Mark Raasveldt. The correlated columns are covered in the original paper. -/// -/// TODO: the outer join is not implemented yet, so some edge cases won't work. -/// Run SQLite tests to catch these, I guess. fn apply_dep_join_past_agg( - _optimizer: &impl Optimizer, + optimizer: &impl Optimizer, binding: ArcDfPlanNode, ) -> Vec> { let join = DependentJoin::from_plan_node(binding).unwrap(); @@ -305,6 +302,8 @@ fn apply_dep_join_past_agg( let groups = agg.groups(); let right = agg.child(); + let left_schema_size = optimizer.get_schema_of(left.clone()).len(); + // Cross join should always have true cond assert!(cond == ConstantPred::bool(true).into_pred_node()); @@ -345,11 +344,65 @@ fn apply_dep_join_past_agg( ); let new_dep_join = - DependentJoin::new_unchecked(left, right, cond, extern_cols, JoinType::Cross); + DependentJoin::new_unchecked(left.clone(), right, cond, extern_cols, JoinType::Cross); + let new_agg_exprs_size = new_exprs.len(); + let new_agg_groups_size = new_groups.len(); + let new_agg_schema_size = new_agg_groups_size + new_agg_exprs_size; let new_agg = LogicalAgg::new(new_dep_join.into_plan_node(), new_exprs, new_groups); - vec![new_agg.into_plan_node().into()] + // Add left outer join above the agg node, joining the deduplicated set + // with the new agg node. + + // Both sides will have an agg now, so we want to match the correlated + // columns from the left with those from the right + let outer_join_cond = LogOpPred::new( + LogOpType::And, + correlated_col_indices + .iter() + .enumerate() + .map(|(i, x)| { + let x = ColumnRefPred::from_pred_node(x.clone()).unwrap().index(); + assert!(i + left_schema_size < left_schema_size + new_agg_schema_size); + BinOpPred::new( + ColumnRefPred::new(i).into_pred_node(), + // We *prepend* the correlated columns to the groups list, + // so we don't need to take into account the old + // group-by expressions to get the corresponding correlated + // column. + ColumnRefPred::new(left_schema_size + i).into_pred_node(), + BinOpType::Eq, + ) + .into_pred_node() + }) + .collect(), + ); + + let new_outer_join = LogicalJoin::new_unchecked( + left, + new_agg.into_plan_node(), + outer_join_cond.into_pred_node(), + JoinType::LeftOuter, + ); + + // We have to maintain the same schema above outer join as w/o it, but we + // also need to use the groups from the deduplicated left side, and the + // exprs from the new agg node. If we use everything from the new agg, + // we don't maintain nulls as desired. + let outer_join_proj = LogicalProjection::new( + new_outer_join.into_plan_node(), + ListPred::new( + (0..left_schema_size) + .into_iter() + .chain( + left_schema_size + new_agg_groups_size..left_schema_size + new_agg_schema_size, + ) + .map(|x| ColumnRefPred::new(x).into_pred_node()) + .collect(), + ), + ); + + vec![outer_join_proj.into_plan_node().into()] } // Heuristics-only rule. If we don't have references to the external columns on the right side, From 9edf1af2d330abc432a0c026bd7e1e84da9db4d0 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 19:13:30 -0500 Subject: [PATCH 15/47] Count(*) fix --- .../src/rules/subquery/depjoin_pushdown.rs | 41 +++++++++++++++---- optd-sqllogictest/slt/unnest-agg-nulls.slt | 4 +- optd-sqllogictest/slt/unnest-count-star.slt | 22 ++++++++++ 3 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 optd-sqllogictest/slt/unnest-count-star.slt diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 5e59f20c..139ab8c2 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -3,17 +3,16 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. -use optd_core::nodes::{PlanNodeOrGroup, PredNode}; -// TODO: No push past join -// TODO: Sideways information passing?? +use datafusion_expr::{AggregateFunction, BuiltinScalarFunction}; +use optd_core::nodes::{PlanNodeOrGroup, PredNode, Value}; use optd_core::optimizer::Optimizer; use optd_core::rules::{Rule, RuleMatcher}; use crate::plan_nodes::{ - ArcDfPlanNode, ArcDfPredNode, BinOpPred, BinOpType, ColumnRefPred, ConstantPred, DependentJoin, - DfNodeType, DfPredType, DfReprPlanNode, DfReprPredNode, ExternColumnRefPred, JoinType, - ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalFilter, LogicalJoin, LogicalProjection, - PredExt, RawDependentJoin, + ArcDfPlanNode, ArcDfPredNode, BinOpPred, BinOpType, ColumnRefPred, ConstantPred, ConstantType, + DependentJoin, DfNodeType, DfPredType, DfReprPlanNode, DfReprPredNode, ExternColumnRefPred, + FuncPred, FuncType, JoinType, ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalFilter, + LogicalJoin, LogicalProjection, PredExt, RawDependentJoin, }; use crate::rules::macros::define_rule; use crate::OptimizerExt; @@ -397,7 +396,33 @@ fn apply_dep_join_past_agg( .chain( left_schema_size + new_agg_groups_size..left_schema_size + new_agg_schema_size, ) - .map(|x| ColumnRefPred::new(x).into_pred_node()) + .map(|x| { + // Count(*) special case: We want all NULLs to be transformed into 0s. + if x >= left_schema_size + new_agg_groups_size { + // If this node corresponds to an agg function, and + // it's a count(*), apply the workaround + let expr = + exprs.to_vec()[x - left_schema_size - new_agg_groups_size].clone(); + if expr.typ == DfPredType::Func(FuncType::Agg(AggregateFunction::Count)) { + let expr_child = expr.child(0).child(0); + + if expr_child.typ == DfPredType::Constant(ConstantType::UInt8) + && expr_child.data == Some(Value::UInt8(1)) + { + return FuncPred::new( + FuncType::Scalar(BuiltinScalarFunction::Coalesce), + ListPred::new(vec![ + ColumnRefPred::new(x).into_pred_node(), + ConstantPred::int64(0).into_pred_node(), + ]), + ) + .into_pred_node(); + } + } + } + + ColumnRefPred::new(x).into_pred_node() + }) .collect(), ), ); diff --git a/optd-sqllogictest/slt/unnest-agg-nulls.slt b/optd-sqllogictest/slt/unnest-agg-nulls.slt index 8229dbf4..32661fc8 100644 --- a/optd-sqllogictest/slt/unnest-agg-nulls.slt +++ b/optd-sqllogictest/slt/unnest-agg-nulls.slt @@ -12,12 +12,10 @@ select from t2 where v4 = v2 ) as avg_v4 -from t1; +from t1 order by v1; ---- 1 100 2 200 200.0 2 250 250.0 3 300 300.0 3 300 300.0 - -# todo add COUNT(*) diff --git a/optd-sqllogictest/slt/unnest-count-star.slt b/optd-sqllogictest/slt/unnest-count-star.slt new file mode 100644 index 00000000..8e73762a --- /dev/null +++ b/optd-sqllogictest/slt/unnest-count-star.slt @@ -0,0 +1,22 @@ +include _basic_tables.slt.part + +# This query uses a count(*) agg function, with nulls. Nulls should be +# transformed from NULL to 0 when they come from count(*). +# It won't work without the outer join fix + a special case on count(*). +# It also has out-of-order extern column [#1] +query +select + v1, + v2, + ( + select count(*) + from t2 + where v4 = v2 + ) as avg_v4 +from t1 order by v1; +---- +1 100 0 +2 200 1 +2 250 1 +3 300 1 +3 300 1 From b0b9e4e4720e77c9a45900f138fd04dbf6352063 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 19:32:57 -0500 Subject: [PATCH 16/47] planner test updates --- .../subqueries/subquery_unnesting.planner.sql | 180 +++++++++++------- .../tests/tpch/tpch-01-05.planner.sql | 96 ++++++---- 2 files changed, 165 insertions(+), 111 deletions(-) diff --git a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql index c14291a1..73a94d8d 100644 --- a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql +++ b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql @@ -41,37 +41,56 @@ LogicalProjection { exprs: [ #0, #1 ] } │ └── #2 ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - └── LogicalFilter - ├── cond:Eq - │ ├── #1 - │ └── #0 - └── LogicalJoin { join_type: Inner, cond: true } - ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalScan { table: t1 } - └── LogicalScan { table: t2 } -PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=18005,io=3000}, stat: {row_cnt=1} } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=18002,io=3000}, stat: {row_cnt=1} } - ├── PhysicalFilter - │ ├── cond:Gt - │ │ ├── #1 - │ │ └── 100(i64) - │ ├── cost: {compute=17000,io=2000} - │ ├── stat: {row_cnt=1} - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── [ Cast { cast_to: Int64, child: #2 } ] - │ ├── groups: [ #0 ] - │ ├── cost: {compute=14000,io=2000} - │ ├── stat: {row_cnt=1000} - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - │ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── LogicalProjection { exprs: [ #0, #2 ] } + └── LogicalJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + └── LogicalFilter + ├── cond:Eq + │ ├── #1 + │ └── #0 + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalScan { table: t2 } +PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4033003,io=4000}, stat: {row_cnt=1} } +└── PhysicalFilter + ├── cond:Gt + │ ├── #4 + │ └── 100(i64) + ├── cost: {compute=4033000,io=4000} + ├── stat: {row_cnt=1} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4018000,io=3000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + ├── cost: {compute=14000,io=2000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ -- Test whether the optimizer can unnest correlated subqueries. @@ -115,45 +134,64 @@ LogicalProjection { exprs: [ #0, #1 ] } │ └── #2 ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #1 - │ │ └── #0 - │ └── Eq - │ ├── #2 - │ └── #3 - └── LogicalJoin { join_type: Inner, cond: true } - ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalScan { table: t1 } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalScan { table: t2 } - └── LogicalScan { table: t3 } -PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=21005,io=4000}, stat: {row_cnt=1} } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=21002,io=4000}, stat: {row_cnt=1} } - ├── PhysicalFilter - │ ├── cond:Gt - │ │ ├── #1 - │ │ └── 100(i64) - │ ├── cost: {compute=20000,io=3000} - │ ├── stat: {row_cnt=1} - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── [ Cast { cast_to: Int64, child: #2 } ] - │ ├── groups: [ #0 ] - │ ├── cost: {compute=17000,io=3000} - │ ├── stat: {row_cnt=1000} - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ], cost: {compute=9000,io=3000}, stat: {row_cnt=1000} } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - │ │ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ │ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - │ │ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t3, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── LogicalProjection { exprs: [ #0, #2 ] } + └── LogicalJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #1 + │ │ └── #0 + │ └── Eq + │ ├── #2 + │ └── #3 + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalScan { table: t2 } + └── LogicalScan { table: t3 } +PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4036003,io=5000}, stat: {row_cnt=1} } +└── PhysicalFilter + ├── cond:Gt + │ ├── #4 + │ └── 100(i64) + ├── cost: {compute=4036000,io=5000} + ├── stat: {row_cnt=1} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4033000,io=5000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4021000,io=4000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + ├── cost: {compute=17000,io=3000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ], cost: {compute=9000,io=3000}, stat: {row_cnt=1000} } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + │ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t3, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ diff --git a/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql b/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql index 0ee8d3c0..9e4f5fa8 100644 --- a/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql +++ b/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql @@ -335,7 +335,7 @@ PhysicalLimit { skip: 0(u64), fetch: 100(u64) } │ └── SortOrder { order: Asc } │ └── #3 └── PhysicalProjection { exprs: [ #21, #17, #4, #7, #9, #18, #20, #22 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #26, #7 ], right_keys: [ #1, #0 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #26, #7 ], right_keys: [ #2, #0 ] } ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #7, #16 ], right_keys: [ #0, #1 ] } │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #12 ] } │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #2 ] } @@ -355,45 +355,61 @@ PhysicalLimit { skip: 0(u64), fetch: 100(u64) } │ │ │ └── PhysicalScan { table: part } │ │ └── PhysicalScan { table: supplier } │ └── PhysicalScan { table: partsupp } - └── PhysicalAgg - ├── aggrs:Agg(Min) - │ └── [ #4 ] - ├── groups: [ #0 ] - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #1 - │ ├── Eq - │ │ ├── #6 - │ │ └── #2 - │ ├── Eq - │ │ ├── #9 - │ │ └── #13 - │ ├── Eq - │ │ ├── #15 - │ │ └── #17 - │ └── Eq - │ ├── #18 - │ └── "AFRICA" - └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } - ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } - │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalScan { table: part } - │ │ │ │ └── PhysicalScan { table: supplier } - │ │ │ └── PhysicalScan { table: partsupp } - │ │ └── PhysicalScan { table: nation } - │ └── PhysicalScan { table: region } - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalScan { table: partsupp } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalScan { table: nation } - └── PhysicalScan { table: region } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } + │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ │ ├── PhysicalScan { table: part } + │ │ │ │ └── PhysicalScan { table: supplier } + │ │ │ └── PhysicalScan { table: partsupp } + │ │ └── PhysicalScan { table: nation } + │ └── PhysicalScan { table: region } + └── PhysicalAgg + ├── aggrs:Agg(Min) + │ └── [ #4 ] + ├── groups: [ #0 ] + └── PhysicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── #1 + │ ├── Eq + │ │ ├── #6 + │ │ └── #2 + │ ├── Eq + │ │ ├── #9 + │ │ └── #13 + │ ├── Eq + │ │ ├── #15 + │ │ └── #17 + │ └── Eq + │ ├── #18 + │ └── "AFRICA" + └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } + │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ │ ├── PhysicalScan { table: part } + │ │ │ │ └── PhysicalScan { table: supplier } + │ │ │ └── PhysicalScan { table: partsupp } + │ │ └── PhysicalScan { table: nation } + │ └── PhysicalScan { table: region } + └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ ├── PhysicalScan { table: partsupp } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalScan { table: nation } + └── PhysicalScan { table: region } */ -- TPC-H Q3 From 9778e2577fe445b10c511d37f57a8f6c31662d33 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 19:33:05 -0500 Subject: [PATCH 17/47] clippy --- optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 139ab8c2..9aef20ed 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -392,7 +392,6 @@ fn apply_dep_join_past_agg( new_outer_join.into_plan_node(), ListPred::new( (0..left_schema_size) - .into_iter() .chain( left_schema_size + new_agg_groups_size..left_schema_size + new_agg_schema_size, ) From 87c9c09e2a7f814b209162109938f0e1eccda952 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 19:34:54 -0500 Subject: [PATCH 18/47] Unused variable --- optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 9aef20ed..dd0127b9 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -361,7 +361,6 @@ fn apply_dep_join_past_agg( .iter() .enumerate() .map(|(i, x)| { - let x = ColumnRefPred::from_pred_node(x.clone()).unwrap().index(); assert!(i + left_schema_size < left_schema_size + new_agg_schema_size); BinOpPred::new( ColumnRefPred::new(i).into_pred_node(), From 59ee25b3fcbc5d162b684957f0105193342fb047 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 21:14:57 -0500 Subject: [PATCH 19/47] Initial correlated EXISTS support --- optd-datafusion-bridge/src/into_optd.rs | 46 +++++++++++++++---- optd-datafusion-repr/src/rules/macros.rs | 41 ++++++++++++----- .../project_filter_transpose.rs | 5 +- .../src/rules/subquery/depjoin_pushdown.rs | 16 +++---- optd-sqllogictest/slt/unnest-exists.slt | 18 ++++++++ 5 files changed, 96 insertions(+), 30 deletions(-) create mode 100644 optd-sqllogictest/slt/unnest-exists.slt diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index 238a7a00..a6913110 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -20,19 +20,33 @@ use optd_datafusion_repr::properties::schema::Schema as OptdSchema; use crate::OptdPlanContext; +#[derive(Debug, Clone, Copy)] +enum SubqueryType { + Scalar, + Exists, +} + impl OptdPlanContext<'_> { fn subqueries_to_dependent_joins( &mut self, - subqueries: &[&Subquery], + subqueries: &[(&Subquery, SubqueryType)], input: ArcDfPlanNode, input_schema: &DFSchema, ) -> Result { let mut node = input; - for Subquery { - subquery, - outer_ref_columns, - } in subqueries.iter() + for ( + Subquery { + subquery, + outer_ref_columns, + }, + sq_typ, + ) in subqueries.iter() { + let dep_join_type = match sq_typ { + SubqueryType::Scalar => JoinType::Inner, + SubqueryType::Exists => JoinType::LeftSemi, + }; + let subquery_root = self.conv_into_optd_plan_node(subquery, Some(input_schema))?; let dep_join = RawDependentJoin::new( node, @@ -55,7 +69,7 @@ impl OptdPlanContext<'_> { }) .collect(), ), - JoinType::Cross, + dep_join_type, ); node = dep_join.into_plan_node(); } @@ -91,7 +105,7 @@ impl OptdPlanContext<'_> { expr: &'a logical_expr::Expr, context: &DFSchema, dep_ctx: Option<&DFSchema>, - subqueries: &mut Vec<&'a Subquery>, + subqueries: &mut Vec<(&'a Subquery, SubqueryType)>, ) -> Result { use logical_expr::Expr; match expr { @@ -276,9 +290,23 @@ impl OptdPlanContext<'_> { // This relies on a left-deep tree of dependent joins being // generated below this node, in response to all pushed subqueries. let new_column_ref_idx = context.fields().len() + subqueries.len(); - subqueries.push(sq); + subqueries.push((sq, SubqueryType::Scalar)); Ok(ColumnRefPred::new(new_column_ref_idx).into_pred_node()) } + Expr::Exists(ex) => { + // We could use mark join here, if we had one... + let sq = &ex.subquery; + assert!(!ex.negated, "unimplemented"); // Use anti join + + let new_column_ref_idx = context.fields().len() + subqueries.len(); + subqueries.push((sq, SubqueryType::Exists)); + Ok(BinOpPred::new( + ColumnRefPred::new(new_column_ref_idx).into_pred_node(), + ConstantPred::int64(0).into_pred_node(), + BinOpType::Gt, + ) + .into_pred_node()) + } _ => bail!("Unsupported expression: {:?}", expr), } } @@ -324,7 +352,7 @@ impl OptdPlanContext<'_> { exprs: &'a [logical_expr::Expr], context: &DFSchema, dep_ctx: Option<&DFSchema>, - subqueries: &mut Vec<&'a Subquery>, + subqueries: &mut Vec<(&'a Subquery, SubqueryType)>, ) -> Result { let exprs = exprs .iter() diff --git a/optd-datafusion-repr/src/rules/macros.rs b/optd-datafusion-repr/src/rules/macros.rs index 47703e66..420e2963 100644 --- a/optd-datafusion-repr/src/rules/macros.rs +++ b/optd-datafusion-repr/src/rules/macros.rs @@ -4,21 +4,30 @@ // https://opensource.org/licenses/MIT. macro_rules! define_matcher { - ( ( $typ:expr $(, $children:tt )* ) ) => { - RuleMatcher::MatchNode { - typ: $typ, - children: vec![ - $( crate::rules::macros::define_matcher!($children) ),* - ], + ( $discriminant:expr, ( $typ:expr $(, $children:tt )* ) ) => { + if $discriminant { + RuleMatcher::MatchDiscriminant { + typ_discriminant: std::mem::discriminant(&$typ), + children: vec![ + $( crate::rules::macros::define_matcher!($discriminant, $children) ),* + ], + } + } else { + RuleMatcher::MatchNode { + typ: $typ, + children: vec![ + $( crate::rules::macros::define_matcher!($discriminant, $children) ),* + ], + } } }; - ( $pick_one:tt ) => { + ( $discriminant:expr, $pick_one:tt ) => { RuleMatcher::Any }; } macro_rules! define_rule_inner { - ($rule_type:expr, $name:ident, $apply:ident, $($matcher:tt)+) => { + ($rule_type:expr, $discriminant:expr, $name:ident, $apply:ident, $($matcher:tt)+) => { pub struct $name { matcher: RuleMatcher, } @@ -27,7 +36,7 @@ macro_rules! define_rule_inner { pub fn new() -> Self { #[allow(unused_imports)] use DfNodeType::*; - let matcher = crate::rules::macros::define_matcher!($($matcher)+); + let matcher = crate::rules::macros::define_matcher! { $discriminant, $($matcher)+ }; Self { matcher } } } @@ -60,14 +69,22 @@ macro_rules! define_rule_inner { macro_rules! define_rule { ($name:ident, $apply:ident, $($matcher:tt)+) => { - crate::rules::macros::define_rule_inner! { false, $name, $apply, $($matcher)+ } + crate::rules::macros::define_rule_inner! { false, false, $name, $apply, $($matcher)+ } + }; +} + +macro_rules! define_rule_discriminant { + ($name:ident, $apply:ident, $($matcher:tt)+) => { + crate::rules::macros::define_rule_inner! { false, true, $name, $apply, $($matcher)+ } }; } macro_rules! define_impl_rule { ($name:ident, $apply:ident, $($matcher:tt)+) => { - crate::rules::macros::define_rule_inner! { true, $name, $apply, $($matcher)+ } + crate::rules::macros::define_rule_inner! { true, false, $name, $apply, $($matcher)+ } }; } -pub(crate) use {define_impl_rule, define_matcher, define_rule, define_rule_inner}; +pub(crate) use { + define_impl_rule, define_matcher, define_rule, define_rule_discriminant, define_rule_inner, +}; diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs index a31da316..9485dde3 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs @@ -100,7 +100,10 @@ fn apply_filter_project_transpose( let exprs = proj.exprs(); let cond = filter.cond(); - let proj_col_map = ProjectionMapping::build(&exprs).unwrap(); + let Some(proj_col_map) = ProjectionMapping::build(&exprs) else { + return vec![]; + }; + let rewritten_cond = proj_col_map.rewrite_filter_cond(cond, false); let new_filter_node = LogicalFilter::new_unchecked(child, rewritten_cond); diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 1c087554..8adda72f 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -15,7 +15,7 @@ use crate::plan_nodes::{ ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalFilter, LogicalJoin, LogicalProjection, PredExt, RawDependentJoin, }; -use crate::rules::macros::define_rule; +use crate::rules::macros::define_rule_discriminant; use crate::OptimizerExt; /// Like rewrite_column_refs, except it translates ExternColumnRefs into ColumnRefs @@ -49,7 +49,7 @@ fn rewrite_extern_column_refs( ) } -define_rule!( +define_rule_discriminant!( DepInitialDistinct, apply_dep_initial_distinct, (RawDepJoin(JoinType::Cross), left, right) @@ -87,7 +87,7 @@ fn apply_dep_initial_distinct( left, right, ConstantPred::bool(true).into_pred_node(), - JoinType::Cross, + join.join_type(), ); return vec![new_join.into_plan_node().into()]; @@ -111,7 +111,7 @@ fn apply_dep_initial_distinct( right, cond, extern_cols, - JoinType::Cross, + join.join_type(), ); // Our join condition is going to make sure that all of the correlated columns @@ -165,7 +165,7 @@ fn apply_dep_initial_distinct( vec![new_proj.into_plan_node().into()] } -define_rule!( +define_rule_discriminant!( DepJoinPastProj, apply_dep_join_past_proj, (DepJoin(JoinType::Cross), left, (Projection, right)) @@ -212,7 +212,7 @@ fn apply_dep_join_past_proj( vec![new_proj.into_plan_node().into()] } -define_rule!( +define_rule_discriminant!( DepJoinPastFilter, apply_dep_join_past_filter, (DepJoin(JoinType::Cross), left, (Filter, right)) @@ -276,7 +276,7 @@ fn apply_dep_join_past_filter( vec![new_filter.into_plan_node().into()] } -define_rule!( +define_rule_discriminant!( DepJoinPastAgg, apply_dep_join_past_agg, (DepJoin(JoinType::Cross), left, (Agg, right)) @@ -354,7 +354,7 @@ fn apply_dep_join_past_agg( // Heuristics-only rule. If we don't have references to the external columns on the right side, // we can rewrite the dependent join into a normal join. -define_rule!( +define_rule_discriminant!( DepJoinEliminate, apply_dep_join_eliminate_at_scan, // TODO matching is all wrong (DepJoin(JoinType::Cross), left, right) diff --git a/optd-sqllogictest/slt/unnest-exists.slt b/optd-sqllogictest/slt/unnest-exists.slt new file mode 100644 index 00000000..8cefa55b --- /dev/null +++ b/optd-sqllogictest/slt/unnest-exists.slt @@ -0,0 +1,18 @@ +include _tpch_tables.slt.part + +query +SELECT + c_custkey, + c_name +FROM + customer c +WHERE + EXISTS ( + SELECT 1 + FROM orders o + WHERE o.o_custkey = c.c_custkey + AND o.o_orderstatus = 'O' + AND o.o_orderdate > '1998-08-01' + ); +---- +88 Customer#000000088 From 712f3200058a571f15c5b57eca31bd5549eb003a Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Fri, 6 Dec 2024 22:27:34 -0500 Subject: [PATCH 20/47] Avoid self join issue in adv cost model --- optd-datafusion-repr-adv-cost/src/adv_stats/join.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs index 5aa8fb6c..b6e30e5d 100644 --- a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs +++ b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs @@ -359,7 +359,11 @@ impl< &self, base_col_refs: HashSet, ) -> f64 { - assert!(base_col_refs.len() > 1); + // Hack to avoid issue w/ self joins...unsure if this is a good idea + if base_col_refs.len() <= 1 { + return 1.0; + } + let num_base_col_refs = base_col_refs.len(); base_col_refs .into_iter() From 5e283ceb9a76a2f7b0a3fb1a7dd97fc097b7a825 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 00:04:22 -0500 Subject: [PATCH 21/47] Q4 working --- optd-datafusion-bridge/src/from_optd.rs | 10 +++- optd-datafusion-bridge/src/into_optd.rs | 4 +- .../src/adv_stats/join.rs | 1 + optd-datafusion-repr/src/lib.rs | 2 +- .../src/rules/subquery/depjoin_pushdown.rs | 54 ++++++++++++------- 5 files changed, 46 insertions(+), 25 deletions(-) diff --git a/optd-datafusion-bridge/src/from_optd.rs b/optd-datafusion-bridge/src/from_optd.rs index 5346a021..7fbc607b 100644 --- a/optd-datafusion-bridge/src/from_optd.rs +++ b/optd-datafusion-bridge/src/from_optd.rs @@ -450,8 +450,14 @@ impl OptdPlanContext<'_> { } let join_type = match node.join_type() { - JoinType::Inner => datafusion::logical_expr::JoinType::Inner, - JoinType::LeftOuter => datafusion::logical_expr::JoinType::Left, + JoinType::Inner => datafusion_expr::JoinType::Inner, + JoinType::FullOuter => datafusion_expr::JoinType::Full, + JoinType::LeftOuter => datafusion_expr::JoinType::Left, + JoinType::RightOuter => datafusion_expr::JoinType::Right, + JoinType::LeftSemi => datafusion_expr::JoinType::LeftSemi, + JoinType::RightSemi => datafusion_expr::JoinType::RightSemi, + JoinType::LeftAnti => datafusion_expr::JoinType::LeftAnti, + JoinType::RightAnti => datafusion_expr::JoinType::RightAnti, _ => unimplemented!(), }; diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index a6913110..a0f65387 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -302,8 +302,8 @@ impl OptdPlanContext<'_> { subqueries.push((sq, SubqueryType::Exists)); Ok(BinOpPred::new( ColumnRefPred::new(new_column_ref_idx).into_pred_node(), - ConstantPred::int64(0).into_pred_node(), - BinOpType::Gt, + ConstantPred::bool(true).into_pred_node(), + BinOpType::Eq, ) .into_pred_node()) } diff --git a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs index b6e30e5d..8a23bf02 100644 --- a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs +++ b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs @@ -191,6 +191,7 @@ impl< JoinType::Inner => inner_join_selectivity, JoinType::LeftOuter => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), JoinType::RightOuter => f64::max(inner_join_selectivity, 1.0 / left_row_cnt), + JoinType::LeftSemi => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), JoinType::Cross => { assert!( on_col_ref_pairs.is_empty(), diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 5a491d1d..f6848888 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -20,7 +20,7 @@ pub use optd_core::nodes::Value; use optd_core::optimizer::Optimizer; use optd_core::rules::Rule; pub use optimizer_ext::OptimizerExt; -use plan_nodes::{ArcDfPlanNode, DfNodeType}; +use plan_nodes::{ArcDfPlanNode, DfNodeType, DfReprPlanNode}; use properties::column_ref::ColumnRefPropertyBuilder; use properties::schema::{Catalog, SchemaPropertyBuilder}; diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 3ca29a83..01c8c1c9 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -12,7 +12,7 @@ use crate::plan_nodes::{ ArcDfPlanNode, ArcDfPredNode, BinOpPred, BinOpType, ColumnRefPred, ConstantPred, ConstantType, DependentJoin, DfNodeType, DfPredType, DfReprPlanNode, DfReprPredNode, ExternColumnRefPred, FuncPred, FuncType, JoinType, ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalFilter, - LogicalJoin, LogicalProjection, PredExt, RawDependentJoin, + LogicalJoin, LogicalLimit, LogicalProjection, PredExt, RawDependentJoin, }; use crate::rules::macros::define_rule_discriminant; use crate::OptimizerExt; @@ -86,7 +86,7 @@ fn apply_dep_initial_distinct( left, right, ConstantPred::bool(true).into_pred_node(), - join.join_type(), + JoinType::Cross, ); return vec![new_join.into_plan_node().into()]; @@ -142,26 +142,40 @@ fn apply_dep_initial_distinct( left, new_dep_join.into_plan_node(), join_cond.into_pred_node(), - JoinType::Inner, + join.join_type(), ); // Ensure that the schema above the new_join is the same as it was before // for correctness (Project the left side of the new join, // plus the *right side of the right side*) - let new_proj = LogicalProjection::new( - new_join.into_plan_node(), - ListPred::new( - (0..left_schema_size) - .chain( - (left_schema_size + correlated_col_indices.len()) - ..(left_schema_size + correlated_col_indices.len() + right_schema_size), - ) - .map(|x| ColumnRefPred::new(x).into_pred_node()) - .collect(), - ), - ); - - vec![new_proj.into_plan_node().into()] + let node = match join.join_type() { + JoinType::Inner => LogicalProjection::new( + new_join.into_plan_node(), + ListPred::new( + (0..left_schema_size) + .chain( + (left_schema_size + correlated_col_indices.len()) + ..(left_schema_size + correlated_col_indices.len() + right_schema_size), + ) + .map(|x| ColumnRefPred::new(x).into_pred_node()) + .collect(), + ), + ) + .into_plan_node(), + JoinType::LeftSemi => LogicalProjection::new( + new_join.into_plan_node(), + ListPred::new( + (0..left_schema_size) + .map(|x| ColumnRefPred::new(x).into_pred_node()) + .chain([ConstantPred::bool(true).into_pred_node()]) + .collect(), + ), + ) + .into_plan_node(), + _ => unimplemented!(), + }; + + vec![node.into()] } define_rule_discriminant!( @@ -205,7 +219,7 @@ fn apply_dep_join_past_proj( ); let new_dep_join = - DependentJoin::new_unchecked(left, right, cond, extern_cols, JoinType::Cross); + DependentJoin::new_unchecked(left, right, cond, extern_cols, join.join_type()); let new_proj = LogicalProjection::new(new_dep_join.into_plan_node(), new_proj_exprs); vec![new_proj.into_plan_node().into()] @@ -267,7 +281,7 @@ fn apply_dep_join_past_filter( .map(|x| ExternColumnRefPred::new(x).into_pred_node()) .collect(), ), - JoinType::Cross, + join.join_type(), ); let new_filter = LogicalFilter::new(new_dep_join.into_plan_node(), rewritten_expr); @@ -343,7 +357,7 @@ fn apply_dep_join_past_agg( ); let new_dep_join = - DependentJoin::new_unchecked(left.clone(), right, cond, extern_cols, JoinType::Cross); + DependentJoin::new_unchecked(left.clone(), right, cond, extern_cols, join.join_type()); let new_agg_exprs_size = new_exprs.len(); let new_agg_groups_size = new_groups.len(); From ee74f94a028af38af9a039b5d5f1e346b2c1ee9a Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 00:04:33 -0500 Subject: [PATCH 22/47] Q4 --- optd-sqllogictest/slt/tpch-q4.slt | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 optd-sqllogictest/slt/tpch-q4.slt diff --git a/optd-sqllogictest/slt/tpch-q4.slt b/optd-sqllogictest/slt/tpch-q4.slt new file mode 100644 index 00000000..dc991161 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q4.slt @@ -0,0 +1,30 @@ +include _tpch_tables.slt.part + +query +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; +---- +1-URGENT 9 +2-HIGH 7 +3-MEDIUM 9 +4-NOT SPECIFIED 8 +5-LOW 12 From c37a3e5ac9bfc843145823a82e1eb6950db163e7 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 00:07:31 -0500 Subject: [PATCH 23/47] Update sqlplannertest plans --- .../tests/subqueries/subquery_unnesting.planner.sql | 4 ++-- optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql index 73a94d8d..d1eebd0b 100644 --- a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql +++ b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql @@ -16,7 +16,7 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── cond:Gt │ ├── #2 │ └── 100(i64) - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg @@ -102,7 +102,7 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── cond:Gt │ ├── #2 │ └── 100(i64) - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg diff --git a/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql b/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql index 9e4f5fa8..f917e394 100644 --- a/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql +++ b/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql @@ -284,7 +284,7 @@ LogicalLimit { skip: 0(u64), fetch: 100(u64) } │ └── Eq │ ├── #19 │ └── #28 - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalJoin { join_type: Cross, cond: true } │ ├── LogicalJoin { join_type: Cross, cond: true } │ │ ├── LogicalJoin { join_type: Cross, cond: true } From 2ef7682d0a448ee547dacae24e50e1453955bf02 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 00:36:33 -0500 Subject: [PATCH 24/47] Support for NOT EXISTS + simplify approach somewhat...not sure this would work with OR --- optd-datafusion-bridge/src/into_optd.rs | 18 +++++++++--------- .../src/adv_stats/join.rs | 1 + .../src/rules/subquery/depjoin_pushdown.rs | 3 +-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index a0f65387..84af5a20 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -24,6 +24,7 @@ use crate::OptdPlanContext; enum SubqueryType { Scalar, Exists, + NotExists, } impl OptdPlanContext<'_> { @@ -45,6 +46,7 @@ impl OptdPlanContext<'_> { let dep_join_type = match sq_typ { SubqueryType::Scalar => JoinType::Inner, SubqueryType::Exists => JoinType::LeftSemi, + SubqueryType::NotExists => JoinType::LeftAnti, }; let subquery_root = self.conv_into_optd_plan_node(subquery, Some(input_schema))?; @@ -296,16 +298,14 @@ impl OptdPlanContext<'_> { Expr::Exists(ex) => { // We could use mark join here, if we had one... let sq = &ex.subquery; - assert!(!ex.negated, "unimplemented"); // Use anti join + let typ = if ex.negated { + SubqueryType::NotExists + } else { + SubqueryType::Exists + }; - let new_column_ref_idx = context.fields().len() + subqueries.len(); - subqueries.push((sq, SubqueryType::Exists)); - Ok(BinOpPred::new( - ColumnRefPred::new(new_column_ref_idx).into_pred_node(), - ConstantPred::bool(true).into_pred_node(), - BinOpType::Eq, - ) - .into_pred_node()) + subqueries.push((sq, typ)); + Ok(ConstantPred::bool(true).into_pred_node().into_pred_node()) } _ => bail!("Unsupported expression: {:?}", expr), } diff --git a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs index 8a23bf02..57d95c5a 100644 --- a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs +++ b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs @@ -192,6 +192,7 @@ impl< JoinType::LeftOuter => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), JoinType::RightOuter => f64::max(inner_join_selectivity, 1.0 / left_row_cnt), JoinType::LeftSemi => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), + JoinType::LeftAnti => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), JoinType::Cross => { assert!( on_col_ref_pairs.is_empty(), diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 01c8c1c9..98285f11 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -162,12 +162,11 @@ fn apply_dep_initial_distinct( ), ) .into_plan_node(), - JoinType::LeftSemi => LogicalProjection::new( + JoinType::LeftSemi | JoinType::LeftAnti => LogicalProjection::new( new_join.into_plan_node(), ListPred::new( (0..left_schema_size) .map(|x| ColumnRefPred::new(x).into_pred_node()) - .chain([ConstantPred::bool(true).into_pred_node()]) .collect(), ), ) From 50ac70031471e0cb677ea68568b78624c7d4a6e2 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 01:31:02 -0500 Subject: [PATCH 25/47] Better simulate mark join by using left outer join + more complex test --- optd-datafusion-bridge/src/from_optd.rs | 8 ++++++ optd-datafusion-bridge/src/into_optd.rs | 27 ++++++++++++++----- .../src/plan_nodes/predicates/func_pred.rs | 2 ++ .../src/rules/subquery/depjoin_pushdown.rs | 8 +++++- optd-sqllogictest/slt/unnest-exists-2.slt | 27 +++++++++++++++++++ 5 files changed, 64 insertions(+), 8 deletions(-) create mode 100644 optd-sqllogictest/slt/unnest-exists-2.slt diff --git a/optd-datafusion-bridge/src/from_optd.rs b/optd-datafusion-bridge/src/from_optd.rs index 7fbc607b..c554b223 100644 --- a/optd-datafusion-bridge/src/from_optd.rs +++ b/optd-datafusion-bridge/src/from_optd.rs @@ -174,6 +174,14 @@ impl OptdPlanContext<'_> { Some(else_expr), )?) } + FuncType::IsNull => { + let expr = args[0].clone(); + Ok(physical_expr::expressions::is_null(expr)?) + } + FuncType::IsNotNull => { + let expr = args[0].clone(); + Ok(physical_expr::expressions::is_not_null(expr)?) + } _ => unreachable!(), } } diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index 84af5a20..f83738fc 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -24,7 +24,6 @@ use crate::OptdPlanContext; enum SubqueryType { Scalar, Exists, - NotExists, } impl OptdPlanContext<'_> { @@ -45,8 +44,7 @@ impl OptdPlanContext<'_> { { let dep_join_type = match sq_typ { SubqueryType::Scalar => JoinType::Inner, - SubqueryType::Exists => JoinType::LeftSemi, - SubqueryType::NotExists => JoinType::LeftAnti, + SubqueryType::Exists => JoinType::LeftOuter, }; let subquery_root = self.conv_into_optd_plan_node(subquery, Some(input_schema))?; @@ -248,6 +246,14 @@ impl OptdPlanContext<'_> { ) .into_pred_node()) } + Expr::IsNull(x) => { + let expr = self.conv_into_optd_expr(x.as_ref(), context, dep_ctx, subqueries)?; + Ok(FuncPred::new(FuncType::IsNull, ListPred::new(vec![expr])).into_pred_node()) + } + Expr::IsNotNull(x) => { + let expr = self.conv_into_optd_expr(x.as_ref(), context, dep_ctx, subqueries)?; + Ok(FuncPred::new(FuncType::IsNotNull, ListPred::new(vec![expr])).into_pred_node()) + } Expr::Sort(x) => { let expr = self.conv_into_optd_expr(x.expr.as_ref(), context, dep_ctx, subqueries)?; @@ -298,14 +304,21 @@ impl OptdPlanContext<'_> { Expr::Exists(ex) => { // We could use mark join here, if we had one... let sq = &ex.subquery; - let typ = if ex.negated { - SubqueryType::NotExists + let typ = SubqueryType::Exists; + let bin_op = if ex.negated { + BinOpType::Neq } else { - SubqueryType::Exists + BinOpType::Eq }; + let new_column_ref_idx = context.fields().len() + subqueries.len(); subqueries.push((sq, typ)); - Ok(ConstantPred::bool(true).into_pred_node().into_pred_node()) + Ok(BinOpPred::new( + ColumnRefPred::new(new_column_ref_idx).into_pred_node(), + ConstantPred::bool(true).into_pred_node(), + bin_op, + ) + .into_pred_node()) } _ => bail!("Unsupported expression: {:?}", expr), } diff --git a/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs b/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs index b61e1e53..15d7136b 100644 --- a/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs +++ b/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs @@ -14,6 +14,8 @@ pub enum FuncType { Scalar(datafusion_expr::BuiltinScalarFunction), Agg(datafusion_expr::AggregateFunction), Case, + IsNull, + IsNotNull, } impl std::fmt::Display for FuncType { diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 98285f11..bb71eb3d 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -162,11 +162,17 @@ fn apply_dep_initial_distinct( ), ) .into_plan_node(), - JoinType::LeftSemi | JoinType::LeftAnti => LogicalProjection::new( + // Simulate a left mark join + JoinType::LeftOuter => LogicalProjection::new( new_join.into_plan_node(), ListPred::new( (0..left_schema_size) .map(|x| ColumnRefPred::new(x).into_pred_node()) + .chain([FuncPred::new( + FuncType::IsNotNull, + ListPred::new(vec![ColumnRefPred::new(left_schema_size).into_pred_node()]), + ) + .into_pred_node()]) .collect(), ), ) diff --git a/optd-sqllogictest/slt/unnest-exists-2.slt b/optd-sqllogictest/slt/unnest-exists-2.slt new file mode 100644 index 00000000..f3e026a9 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-exists-2.slt @@ -0,0 +1,27 @@ +include _tpch_tables.slt.part + +query +SELECT + c.c_custkey, + c.c_name +FROM + customer c +WHERE + EXISTS ( + SELECT 1 + FROM orders o + WHERE o.o_custkey = c.c_custkey + AND o.o_orderstatus = 'O' + AND o.o_orderdate > '1998-08-01' + ) +AND NOT EXISTS ( + SELECT 1 + FROM orders o + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + WHERE o.o_custkey = c.c_custkey + AND o.o_orderstatus = 'R' + AND o.o_orderdate > '1998-08-01' + AND o.o_totalprice > 5000 +); +---- +88 Customer#000000088 From b101fd7f96abca21feea7ec05f47218e6eadfda8 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 02:02:58 -0500 Subject: [PATCH 26/47] Make it more complicated & more correct + schema modifications --- optd-datafusion-bridge/src/into_optd.rs | 25 +++++++++++-- optd-datafusion-repr/src/properties/schema.rs | 25 +++++++++++-- .../src/rules/subquery/depjoin_pushdown.rs | 36 +++++++++++-------- 3 files changed, 67 insertions(+), 19 deletions(-) diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index f83738fc..2a508f1a 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -24,6 +24,7 @@ use crate::OptdPlanContext; enum SubqueryType { Scalar, Exists, + NotExists, } impl OptdPlanContext<'_> { @@ -44,7 +45,8 @@ impl OptdPlanContext<'_> { { let dep_join_type = match sq_typ { SubqueryType::Scalar => JoinType::Inner, - SubqueryType::Exists => JoinType::LeftOuter, + SubqueryType::Exists => JoinType::LeftSemi, + SubqueryType::NotExists => JoinType::LeftAnti, }; let subquery_root = self.conv_into_optd_plan_node(subquery, Some(input_schema))?; @@ -304,7 +306,11 @@ impl OptdPlanContext<'_> { Expr::Exists(ex) => { // We could use mark join here, if we had one... let sq = &ex.subquery; - let typ = SubqueryType::Exists; + let typ = if ex.negated { + SubqueryType::NotExists + } else { + SubqueryType::Exists + }; let bin_op = if ex.negated { BinOpType::Neq } else { @@ -320,6 +326,21 @@ impl OptdPlanContext<'_> { ) .into_pred_node()) } + Expr::InSubquery(insq) => { + let sq = &insq.subquery; + let expr = + self.conv_into_optd_expr(insq.expr.as_ref(), context, dep_ctx, subqueries)?; + assert!(!insq.negated, "unimplemented"); + + let new_column_ref_idx = context.fields().len() + subqueries.len(); + subqueries.push((sq, SubqueryType::Scalar)); + Ok(BinOpPred::new( + expr, + ColumnRefPred::new(new_column_ref_idx).into_pred_node(), + BinOpType::Eq, + ) + .into_pred_node()) + } _ => bail!("Unsupported expression: {:?}", expr), } } diff --git a/optd-datafusion-repr/src/properties/schema.rs b/optd-datafusion-repr/src/properties/schema.rs index 932da32f..6b02d2a7 100644 --- a/optd-datafusion-repr/src/properties/schema.rs +++ b/optd-datafusion-repr/src/properties/schema.rs @@ -177,9 +177,7 @@ impl LogicalPropertyBuilder for SchemaPropertyBuilder { } DfNodeType::Projection => Self::derive_for_predicate(predicates[0].clone()), DfNodeType::Filter | DfNodeType::Limit | DfNodeType::Sort => children[0].clone(), - DfNodeType::RawDepJoin(join_type) - | DfNodeType::Join(join_type) - | DfNodeType::DepJoin(join_type) => { + DfNodeType::Join(join_type) => { use crate::plan_nodes::JoinType::*; match join_type { Inner | LeftOuter | RightOuter | FullOuter | Cross => { @@ -192,6 +190,27 @@ impl LogicalPropertyBuilder for SchemaPropertyBuilder { RightSemi | RightAnti => children[1].clone(), } } + DfNodeType::RawDepJoin(join_type) | DfNodeType::DepJoin(join_type) => { + use crate::plan_nodes::JoinType::*; + match join_type { + Inner => { + let mut schema = children[0].clone(); + let schema2 = children[1].clone(); + schema.fields.extend(schema2.fields); + schema + } + LeftSemi | LeftAnti => { + let mut schema = children[0].clone(); + schema.fields.push(Field { + name: "exists".to_string(), + typ: ConstantType::Bool, + nullable: false, + }); + schema + } + _ => unreachable!(), + } + } DfNodeType::EmptyRelation => decode_empty_relation_schema(&predicates[1]), x => unimplemented!("cannot derive schema property for {}", x), } diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index bb71eb3d..b0e2cc97 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -3,6 +3,8 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. +use std::iter; + use datafusion_expr::{AggregateFunction, BuiltinScalarFunction}; use optd_core::nodes::{PlanNodeOrGroup, PredNode, Value}; use optd_core::optimizer::Optimizer; @@ -163,20 +165,26 @@ fn apply_dep_initial_distinct( ) .into_plan_node(), // Simulate a left mark join - JoinType::LeftOuter => LogicalProjection::new( - new_join.into_plan_node(), - ListPred::new( - (0..left_schema_size) - .map(|x| ColumnRefPred::new(x).into_pred_node()) - .chain([FuncPred::new( - FuncType::IsNotNull, - ListPred::new(vec![ColumnRefPred::new(left_schema_size).into_pred_node()]), - ) - .into_pred_node()]) - .collect(), - ), - ) - .into_plan_node(), + JoinType::LeftSemi | JoinType::LeftAnti => { + let val = match join.join_type() { + JoinType::LeftSemi => true, + JoinType::LeftAnti => false, + _ => unreachable!(), + }; + LogicalProjection::new( + new_join.into_plan_node(), + ListPred::new( + (0..left_schema_size) + .map(|x| ColumnRefPred::new(x).into_pred_node()) + .chain( + iter::repeat(ConstantPred::bool(val).into_pred_node()) + .take(correlated_col_indices.len()), + ) + .collect(), + ), + ) + .into_plan_node() + } _ => unimplemented!(), }; From c2c908eea03936911e72bf35fc9383853737e2d0 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 02:07:14 -0500 Subject: [PATCH 27/47] Fix NULL not printing --- optd-perfbench/src/datafusion_dbms.rs | 2 +- optd-sqllogictest/slt/unnest-agg-nulls.slt | 2 +- optd-sqllogictest/src/lib.rs | 2 +- optd-sqlplannertest/src/lib.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/optd-perfbench/src/datafusion_dbms.rs b/optd-perfbench/src/datafusion_dbms.rs index 2cf8e44c..0df98818 100644 --- a/optd-perfbench/src/datafusion_dbms.rs +++ b/optd-perfbench/src/datafusion_dbms.rs @@ -153,7 +153,7 @@ impl DatafusionDBMS { let batches = df.collect().await?; - let options = FormatOptions::default(); + let options = FormatOptions::default().with_null("NULL"); for batch in batches { let converters = batch diff --git a/optd-sqllogictest/slt/unnest-agg-nulls.slt b/optd-sqllogictest/slt/unnest-agg-nulls.slt index 32661fc8..538d2469 100644 --- a/optd-sqllogictest/slt/unnest-agg-nulls.slt +++ b/optd-sqllogictest/slt/unnest-agg-nulls.slt @@ -14,7 +14,7 @@ select ) as avg_v4 from t1 order by v1; ---- -1 100 +1 100 NULL 2 200 200.0 2 250 250.0 3 300 300.0 diff --git a/optd-sqllogictest/src/lib.rs b/optd-sqllogictest/src/lib.rs index 71506f57..66c0e301 100644 --- a/optd-sqllogictest/src/lib.rs +++ b/optd-sqllogictest/src/lib.rs @@ -107,7 +107,7 @@ impl DatafusionDBMS { }; let batches = df.collect().await?; - let options = FormatOptions::default(); + let options = FormatOptions::default().with_null("NULL"); for batch in batches { if types.is_empty() { diff --git a/optd-sqlplannertest/src/lib.rs b/optd-sqlplannertest/src/lib.rs index c556d9d0..3fcc74bb 100644 --- a/optd-sqlplannertest/src/lib.rs +++ b/optd-sqlplannertest/src/lib.rs @@ -183,7 +183,7 @@ impl DatafusionDBMS { let batches = df.collect().await?; - let options = FormatOptions::default(); + let options = FormatOptions::default().with_null("NULL"); for batch in batches { let converters = batch From 0d33ef345d5e9650384b4d8265af5c1089724e15 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 02:09:11 -0500 Subject: [PATCH 28/47] Add in tests --- optd-sqllogictest/slt/unnest-in-exists.slt | 20 ++++++++++++++++++++ optd-sqllogictest/slt/unnest-in.slt | 13 +++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 optd-sqllogictest/slt/unnest-in-exists.slt create mode 100644 optd-sqllogictest/slt/unnest-in.slt diff --git a/optd-sqllogictest/slt/unnest-in-exists.slt b/optd-sqllogictest/slt/unnest-in-exists.slt new file mode 100644 index 00000000..29b24b33 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-in-exists.slt @@ -0,0 +1,20 @@ +include _tpch_tables.slt.part + +query +SELECT c.c_custkey, c.c_name +FROM customer c +WHERE c.c_custkey IN ( + SELECT o.o_custkey + FROM orders o + WHERE o.o_totalprice > 250000 +) +AND EXISTS ( + SELECT 1 + FROM orders o + WHERE o.o_custkey = c.c_custkey + AND o.o_orderstatus = 'O' + ) +order by c.c_custkey; +---- +10 Customer#000000010 +70 Customer#000000070 diff --git a/optd-sqllogictest/slt/unnest-in.slt b/optd-sqllogictest/slt/unnest-in.slt new file mode 100644 index 00000000..ad4841e5 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-in.slt @@ -0,0 +1,13 @@ +include _tpch_tables.slt.part + +query +SELECT c.c_custkey, c.c_name +FROM customer c +WHERE c.c_custkey IN ( + SELECT o.o_custkey + FROM orders o + WHERE o.o_totalprice > 250000 +) order by c.c_custkey; +---- +10 Customer#000000010 +70 Customer#000000070 From 0fc8cb96cfd95418eb77a9197598e902a6cb07ce Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 02:26:39 -0500 Subject: [PATCH 29/47] Fix not passing all columns through (I think this was a bug?) --- optd-datafusion-repr/src/lib.rs | 2 +- .../src/rules/subquery/depjoin_pushdown.rs | 4 +- .../subqueries/subquery_unnesting.planner.sql | 182 ++++++++++++------ 3 files changed, 122 insertions(+), 66 deletions(-) diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 5a491d1d..f6848888 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -20,7 +20,7 @@ pub use optd_core::nodes::Value; use optd_core::optimizer::Optimizer; use optd_core::rules::Rule; pub use optimizer_ext::OptimizerExt; -use plan_nodes::{ArcDfPlanNode, DfNodeType}; +use plan_nodes::{ArcDfPlanNode, DfNodeType, DfReprPlanNode}; use properties::column_ref::ColumnRefPropertyBuilder; use properties::schema::{Catalog, SchemaPropertyBuilder}; diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index dd0127b9..41ad768b 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -391,9 +391,7 @@ fn apply_dep_join_past_agg( new_outer_join.into_plan_node(), ListPred::new( (0..left_schema_size) - .chain( - left_schema_size + new_agg_groups_size..left_schema_size + new_agg_schema_size, - ) + .chain(left_schema_size + left_schema_size..left_schema_size + new_agg_schema_size) .map(|x| { // Count(*) special case: We want all NULLs to be transformed into 0s. if x >= left_schema_size + new_agg_groups_size { diff --git a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql index 77bb5cd8..471f28a9 100644 --- a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql +++ b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql @@ -132,48 +132,87 @@ LogicalProjection { exprs: [ #0, #1 ] } │ └── #2 ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ #2 ] - ├── groups: [ #0 ] - └── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalProjection { exprs: [ #0, #2 ] } + └── LogicalJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } └── LogicalAgg ├── exprs:Agg(Sum) + │ └── [ #2 ] + ├── groups: [ #0 ] + └── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalProjection { exprs: [ #0, #2, #3 ] } + └── LogicalJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0, #1 ] + └── LogicalFilter + ├── cond:Eq + │ ├── #1 + │ └── #0 + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalScan { table: t2 } +PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=44228003,io=5000}, stat: {row_cnt=1} } +└── PhysicalFilter + ├── cond:Gt + │ ├── #4 + │ └── 100(i64) + ├── cost: {compute=44228000,io=5000} + ├── stat: {row_cnt=1} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=44225000,io=5000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=44123000,io=4000} + ├── stat: {row_cnt=100000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ #2 ] + ├── groups: [ #0 ] + ├── cost: {compute=4119000,io=3000} + ├── stat: {row_cnt=10000} + └── PhysicalProjection { exprs: [ #0, #2, #3 ], cost: {compute=4059000,io=3000}, stat: {row_cnt=10000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4019000,io=3000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) │ └── [ Cast { cast_to: Int64, child: #2 } ] ├── groups: [ #0, #1 ] - └── LogicalFilter - ├── cond:Eq - │ ├── #1 - │ └── #0 - └── LogicalJoin { join_type: Inner, cond: true } - ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalScan { table: t1 } - └── LogicalScan { table: t2 } -PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=25005,io=3000}, stat: {row_cnt=1} } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=25002,io=3000}, stat: {row_cnt=1} } - ├── PhysicalFilter - │ ├── cond:Gt - │ │ ├── #1 - │ │ └── 100(i64) - │ ├── cost: {compute=24000,io=2000} - │ ├── stat: {row_cnt=1} - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── [ #2 ] - │ ├── groups: [ #0 ] - │ ├── cost: {compute=21000,io=2000} - │ ├── stat: {row_cnt=1000} - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── [ Cast { cast_to: Int64, child: #2 } ] - │ ├── groups: [ #0, #1 ] - │ ├── cost: {compute=15000,io=2000} - │ ├── stat: {row_cnt=1000} - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - │ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + ├── cost: {compute=15000,io=2000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ -- Test whether the optimizer can unnest correlated subqueries with scalar agg in select list @@ -202,31 +241,50 @@ LogicalProjection { exprs: [ #0, #2 ] } │ └── #2 ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - └── LogicalFilter - ├── cond:Eq - │ ├── #1 - │ └── #0 - └── LogicalJoin { join_type: Inner, cond: true } - ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalScan { table: t1 } - └── LogicalScan { table: t2 } -PhysicalProjection { exprs: [ #0, #3 ], cost: {compute=20000,io=3000}, stat: {row_cnt=1000} } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=17000,io=3000}, stat: {row_cnt=1000} } + └── LogicalProjection { exprs: [ #0, #2 ] } + └── LogicalJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + └── LogicalFilter + ├── cond:Eq + │ ├── #1 + │ └── #0 + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalScan { table: t2 } +PhysicalProjection { exprs: [ #0, #4 ], cost: {compute=4033000,io=4000}, stat: {row_cnt=1000} } +└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} } ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - ├── cost: {compute=14000,io=2000} - ├── stat: {row_cnt=1000} - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4018000,io=3000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + ├── cost: {compute=14000,io=2000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ -- Test whether the optimizer can unnest correlated subqueries. From f94650aee22a5dc55643b389d763fc4a764e3255 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 02:28:49 -0500 Subject: [PATCH 30/47] Update planner tests --- .../tests/subqueries/subquery_unnesting.planner.sql | 4 ++-- optd-sqlplannertest/tests/tpch/q11.planner.sql | 2 +- optd-sqlplannertest/tests/tpch/q15.planner.sql | 2 +- optd-sqlplannertest/tests/tpch/q17.planner.sql | 2 +- optd-sqlplannertest/tests/tpch/q2.planner.sql | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql index 924fdc6c..2daa9e1a 100644 --- a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql +++ b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql @@ -102,7 +102,7 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── cond:Gt │ ├── #2 │ └── 100(i64) - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg @@ -220,7 +220,7 @@ select t1v1, (select sum(t2v3) from t2 where t2v1 = t1v1) as sum from t1; /* LogicalProjection { exprs: [ #0, #2 ] } -└── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } +└── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg diff --git a/optd-sqlplannertest/tests/tpch/q11.planner.sql b/optd-sqlplannertest/tests/tpch/q11.planner.sql index 8c0fdf39..6b5d0af9 100644 --- a/optd-sqlplannertest/tests/tpch/q11.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q11.planner.sql @@ -36,7 +36,7 @@ LogicalSort ├── cond:Gt │ ├── Cast { cast_to: Decimal128(38, 15), child: #1 } │ └── #2 - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [] } + └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [] } ├── LogicalAgg │ ├── exprs:Agg(Sum) │ │ └── Mul diff --git a/optd-sqlplannertest/tests/tpch/q15.planner.sql b/optd-sqlplannertest/tests/tpch/q15.planner.sql index 0b50b7fb..4273cf92 100644 --- a/optd-sqlplannertest/tests/tpch/q15.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q15.planner.sql @@ -46,7 +46,7 @@ LogicalSort │ └── Eq │ ├── #8 │ └── #9 - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [] } + └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [] } ├── LogicalJoin { join_type: Cross, cond: true } │ ├── LogicalScan { table: supplier } │ └── LogicalProjection { exprs: [ #0, #1 ] } diff --git a/optd-sqlplannertest/tests/tpch/q17.planner.sql b/optd-sqlplannertest/tests/tpch/q17.planner.sql index 72de706f..ab174a81 100644 --- a/optd-sqlplannertest/tests/tpch/q17.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q17.planner.sql @@ -43,7 +43,7 @@ LogicalProjection │ └── Lt │ ├── Cast { cast_to: Decimal128(30, 15), child: #4 } │ └── #25 - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#16) ] } + └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#16) ] } ├── LogicalJoin { join_type: Cross, cond: true } │ ├── LogicalScan { table: lineitem } │ └── LogicalScan { table: part } diff --git a/optd-sqlplannertest/tests/tpch/q2.planner.sql b/optd-sqlplannertest/tests/tpch/q2.planner.sql index 113e3468..3a095fb6 100644 --- a/optd-sqlplannertest/tests/tpch/q2.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q2.planner.sql @@ -81,7 +81,7 @@ LogicalLimit { skip: 0(u64), fetch: 100(u64) } │ └── Eq │ ├── #19 │ └── #28 - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalJoin { join_type: Cross, cond: true } │ ├── LogicalJoin { join_type: Cross, cond: true } │ │ ├── LogicalJoin { join_type: Cross, cond: true } From 1f0a7e749dc8127800f979eb7fc4d5332f1b6a42 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sat, 7 Dec 2024 02:46:58 -0500 Subject: [PATCH 31/47] Q4 is working --- optd-sqllogictest/slt/tpch-q4.slt.disabled | 30 ---------------------- 1 file changed, 30 deletions(-) delete mode 100644 optd-sqllogictest/slt/tpch-q4.slt.disabled diff --git a/optd-sqllogictest/slt/tpch-q4.slt.disabled b/optd-sqllogictest/slt/tpch-q4.slt.disabled deleted file mode 100644 index dc991161..00000000 --- a/optd-sqllogictest/slt/tpch-q4.slt.disabled +++ /dev/null @@ -1,30 +0,0 @@ -include _tpch_tables.slt.part - -query -select - o_orderpriority, - count(*) as order_count -from - orders -where - o_orderdate >= date '1993-07-01' - and o_orderdate < date '1993-07-01' + interval '3' month - and exists ( - select - * - from - lineitem - where - l_orderkey = o_orderkey - and l_commitdate < l_receiptdate - ) -group by - o_orderpriority -order by - o_orderpriority; ----- -1-URGENT 9 -2-HIGH 7 -3-MEDIUM 9 -4-NOT SPECIFIED 8 -5-LOW 12 From d5a6a475313b497969a5c886b72d6b2bec572900 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 8 Dec 2024 18:19:40 -0500 Subject: [PATCH 32/47] mark join support --- optd-datafusion-bridge/src/from_optd.rs | 1 + optd-datafusion-bridge/src/into_optd.rs | 19 +++++-------------- optd-datafusion-repr/src/plan_nodes/join.rs | 1 + optd-datafusion-repr/src/properties/schema.rs | 18 ++++-------------- 4 files changed, 11 insertions(+), 28 deletions(-) diff --git a/optd-datafusion-bridge/src/from_optd.rs b/optd-datafusion-bridge/src/from_optd.rs index 832ec65c..6457579e 100644 --- a/optd-datafusion-bridge/src/from_optd.rs +++ b/optd-datafusion-bridge/src/from_optd.rs @@ -486,6 +486,7 @@ impl OptdPlanContext<'_> { JoinType::RightSemi => datafusion_expr::JoinType::RightSemi, JoinType::LeftAnti => datafusion_expr::JoinType::LeftAnti, JoinType::RightAnti => datafusion_expr::JoinType::RightAnti, + JoinType::LeftMark => datafusion_expr::JoinType::LeftMark, _ => unimplemented!(), }; diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index 73678a72..8a2c08f2 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -25,7 +25,7 @@ use crate::OptdPlanContext; enum SubqueryType { Scalar, Exists, - NotExists, + Any, } impl OptdPlanContext<'_> { @@ -47,7 +47,7 @@ impl OptdPlanContext<'_> { let dep_join_type = match sq_typ { SubqueryType::Scalar => JoinType::Inner, SubqueryType::Exists => JoinType::LeftSemi, - SubqueryType::NotExists => JoinType::LeftAnti, + _ => unimplemented!(), }; let subquery_root = self.conv_into_optd_plan_node(subquery, Some(input_schema))?; @@ -318,23 +318,13 @@ impl OptdPlanContext<'_> { Expr::Exists(ex) => { // We could use mark join here, if we had one... let sq = &ex.subquery; - let typ = if ex.negated { - SubqueryType::NotExists - } else { - SubqueryType::Exists - }; - let bin_op = if ex.negated { - BinOpType::Neq - } else { - BinOpType::Eq - }; let new_column_ref_idx = context.fields().len() + subqueries.len(); - subqueries.push((sq, typ)); + subqueries.push((sq, SubqueryType::Exists)); Ok(BinOpPred::new( ColumnRefPred::new(new_column_ref_idx).into_pred_node(), ConstantPred::bool(true).into_pred_node(), - bin_op, + BinOpType::Eq, ) .into_pred_node()) } @@ -515,6 +505,7 @@ impl OptdPlanContext<'_> { DFJoinType::RightAnti => JoinType::RightAnti, DFJoinType::LeftSemi => JoinType::LeftSemi, DFJoinType::RightSemi => JoinType::RightSemi, + DFJoinType::LeftMark => JoinType::LeftMark, _ => unimplemented!(), }; let mut log_ops = Vec::with_capacity(node.on.len()); diff --git a/optd-datafusion-repr/src/plan_nodes/join.rs b/optd-datafusion-repr/src/plan_nodes/join.rs index 84dbc033..d506449f 100644 --- a/optd-datafusion-repr/src/plan_nodes/join.rs +++ b/optd-datafusion-repr/src/plan_nodes/join.rs @@ -20,6 +20,7 @@ pub enum JoinType { RightSemi, LeftAnti, RightAnti, + LeftMark, } impl Display for JoinType { diff --git a/optd-datafusion-repr/src/properties/schema.rs b/optd-datafusion-repr/src/properties/schema.rs index 6b02d2a7..5406eaf4 100644 --- a/optd-datafusion-repr/src/properties/schema.rs +++ b/optd-datafusion-repr/src/properties/schema.rs @@ -177,7 +177,9 @@ impl LogicalPropertyBuilder for SchemaPropertyBuilder { } DfNodeType::Projection => Self::derive_for_predicate(predicates[0].clone()), DfNodeType::Filter | DfNodeType::Limit | DfNodeType::Sort => children[0].clone(), - DfNodeType::Join(join_type) => { + DfNodeType::Join(join_type) + | DfNodeType::RawDepJoin(join_type) + | DfNodeType::DepJoin(join_type) => { use crate::plan_nodes::JoinType::*; match join_type { Inner | LeftOuter | RightOuter | FullOuter | Cross => { @@ -188,18 +190,7 @@ impl LogicalPropertyBuilder for SchemaPropertyBuilder { } LeftSemi | LeftAnti => children[0].clone(), RightSemi | RightAnti => children[1].clone(), - } - } - DfNodeType::RawDepJoin(join_type) | DfNodeType::DepJoin(join_type) => { - use crate::plan_nodes::JoinType::*; - match join_type { - Inner => { - let mut schema = children[0].clone(); - let schema2 = children[1].clone(); - schema.fields.extend(schema2.fields); - schema - } - LeftSemi | LeftAnti => { + LeftMark => { let mut schema = children[0].clone(); schema.fields.push(Field { name: "exists".to_string(), @@ -208,7 +199,6 @@ impl LogicalPropertyBuilder for SchemaPropertyBuilder { }); schema } - _ => unreachable!(), } } DfNodeType::EmptyRelation => decode_empty_relation_schema(&predicates[1]), From 5f874734bae14fd23131879c9db2894e66b6fd7b Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 8 Dec 2024 18:57:37 -0500 Subject: [PATCH 33/47] Implement exists w/ mark join --- optd-datafusion-bridge/src/from_optd.rs | 4 +++ optd-datafusion-bridge/src/into_optd.rs | 24 ++++++++++----- .../src/adv_stats/filter.rs | 12 ++++++-- .../src/adv_stats/join.rs | 4 +-- optd-datafusion-repr/src/lib.rs | 2 +- .../src/plan_nodes/predicates/func_pred.rs | 1 + .../src/rules/subquery/depjoin_pushdown.rs | 30 ++++--------------- 7 files changed, 39 insertions(+), 38 deletions(-) diff --git a/optd-datafusion-bridge/src/from_optd.rs b/optd-datafusion-bridge/src/from_optd.rs index 6457579e..e67e97b1 100644 --- a/optd-datafusion-bridge/src/from_optd.rs +++ b/optd-datafusion-bridge/src/from_optd.rs @@ -195,6 +195,10 @@ impl OptdPlanContext<'_> { Some(else_expr), )?) } + FuncType::Not => { + let expr = args[0].clone(); + Ok(physical_expr::expressions::not(expr)?) + } FuncType::IsNull => { let expr = args[0].clone(); Ok(physical_expr::expressions::is_null(expr)?) diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index 8a2c08f2..369605e9 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -46,7 +46,7 @@ impl OptdPlanContext<'_> { { let dep_join_type = match sq_typ { SubqueryType::Scalar => JoinType::Inner, - SubqueryType::Exists => JoinType::LeftSemi, + SubqueryType::Exists => JoinType::LeftMark, _ => unimplemented!(), }; @@ -273,6 +273,10 @@ impl OptdPlanContext<'_> { ) .into_pred_node()) } + Expr::Not(x) => { + let expr = self.conv_into_optd_expr(x.as_ref(), context, dep_ctx, subqueries)?; + Ok(FuncPred::new(FuncType::Not, ListPred::new(vec![expr])).into_pred_node()) + } Expr::IsNull(x) => { let expr = self.conv_into_optd_expr(x.as_ref(), context, dep_ctx, subqueries)?; Ok(FuncPred::new(FuncType::IsNull, ListPred::new(vec![expr])).into_pred_node()) @@ -318,15 +322,21 @@ impl OptdPlanContext<'_> { Expr::Exists(ex) => { // We could use mark join here, if we had one... let sq = &ex.subquery; + let negated = ex.negated; let new_column_ref_idx = context.fields().len() + subqueries.len(); subqueries.push((sq, SubqueryType::Exists)); - Ok(BinOpPred::new( - ColumnRefPred::new(new_column_ref_idx).into_pred_node(), - ConstantPred::bool(true).into_pred_node(), - BinOpType::Eq, - ) - .into_pred_node()) + if negated { + Ok(FuncPred::new( + FuncType::Not, + ListPred::new( + vec![ColumnRefPred::new(new_column_ref_idx).into_pred_node()], + ), + ) + .into_pred_node()) + } else { + Ok(ColumnRefPred::new(new_column_ref_idx).into_pred_node()) + } } Expr::InSubquery(insq) => { let sq = &insq.subquery; diff --git a/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs b/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs index 5850d59c..32dd6be8 100644 --- a/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs +++ b/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs @@ -12,7 +12,7 @@ use optd_datafusion_repr::plan_nodes::{ use optd_datafusion_repr::properties::column_ref::{ BaseTableColumnRef, BaseTableColumnRefs, ColumnRef, GroupColumnRefs, }; -use optd_datafusion_repr::properties::schema::Schema; +use optd_datafusion_repr::properties::schema::{Field, Schema}; use optd_datafusion_repr::Value; use serde::de::DeserializeOwned; use serde::Serialize; @@ -66,7 +66,10 @@ impl< ) -> f64 { match &expr_tree.typ { DfPredType::Constant(_) => Self::get_constant_selectivity(expr_tree), - DfPredType::ColumnRef => unimplemented!("check bool type or else panic"), + DfPredType::ColumnRef => { + // TODO: Check that field is of bool type + 0.5 // TODO: placeholder---how can we get the selectivity? + } DfPredType::UnOp(un_op_typ) => { assert!(expr_tree.children.len() == 1); let child = expr_tree.child(0); @@ -104,7 +107,10 @@ impl< DfPredType::LogOp(log_op_typ) => { self.get_log_op_selectivity(*log_op_typ, &expr_tree.children, schema, column_refs) } - DfPredType::Func(_) => unimplemented!("check bool type or else panic"), + DfPredType::Func(_) => { + // TODO: Check that field is of bool type + 0.5 // TODO: placeholder---how can we get the selectivity? + } DfPredType::SortOrder(_) => { panic!("the selectivity of sort order expressions is undefined") } diff --git a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs index 57d95c5a..0f0053e0 100644 --- a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs +++ b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs @@ -191,8 +191,6 @@ impl< JoinType::Inner => inner_join_selectivity, JoinType::LeftOuter => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), JoinType::RightOuter => f64::max(inner_join_selectivity, 1.0 / left_row_cnt), - JoinType::LeftSemi => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), - JoinType::LeftAnti => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), JoinType::Cross => { assert!( on_col_ref_pairs.is_empty(), @@ -200,6 +198,8 @@ impl< ); join_filter_selectivity } + // TODO: Does this make sense? + JoinType::LeftMark => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), _ => unimplemented!("join_typ={} is not implemented", join_typ), } } diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 5a491d1d..f6848888 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -20,7 +20,7 @@ pub use optd_core::nodes::Value; use optd_core::optimizer::Optimizer; use optd_core::rules::Rule; pub use optimizer_ext::OptimizerExt; -use plan_nodes::{ArcDfPlanNode, DfNodeType}; +use plan_nodes::{ArcDfPlanNode, DfNodeType, DfReprPlanNode}; use properties::column_ref::ColumnRefPropertyBuilder; use properties::schema::{Catalog, SchemaPropertyBuilder}; diff --git a/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs b/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs index cc1a4d82..ccda1205 100644 --- a/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs +++ b/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs @@ -15,6 +15,7 @@ pub enum FuncType { Scalar(String, DataType), Agg(String), Case, + Not, IsNull, IsNotNull, } diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 3c30e31e..3f7f670f 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -149,8 +149,8 @@ fn apply_dep_initial_distinct( // Ensure that the schema above the new_join is the same as it was before // for correctness (Project the left side of the new join, // plus the *right side of the right side*) - let node = match join.join_type() { - JoinType::Inner => LogicalProjection::new( + let node = if matches!(join.join_type(), JoinType::Inner) { + LogicalProjection::new( new_join.into_plan_node(), ListPred::new( (0..left_schema_size) @@ -162,29 +162,9 @@ fn apply_dep_initial_distinct( .collect(), ), ) - .into_plan_node(), - // Simulate a left mark join - JoinType::LeftSemi | JoinType::LeftAnti => { - let val = match join.join_type() { - JoinType::LeftSemi => true, - JoinType::LeftAnti => false, - _ => unreachable!(), - }; - LogicalProjection::new( - new_join.into_plan_node(), - ListPred::new( - (0..left_schema_size) - .map(|x| ColumnRefPred::new(x).into_pred_node()) - .chain( - iter::repeat(ConstantPred::bool(val).into_pred_node()) - .take(correlated_col_indices.len()), - ) - .collect(), - ), - ) - .into_plan_node() - } - _ => unimplemented!(), + .into_plan_node() + } else { + new_join.into_plan_node() }; vec![node.into()] From fa3e99c23c835f49004eddf20b2c9aceec931053 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 8 Dec 2024 19:30:44 -0500 Subject: [PATCH 34/47] maybe more correct IN? --- optd-datafusion-bridge/src/into_optd.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index 369605e9..22ea4c0b 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -46,8 +46,7 @@ impl OptdPlanContext<'_> { { let dep_join_type = match sq_typ { SubqueryType::Scalar => JoinType::Inner, - SubqueryType::Exists => JoinType::LeftMark, - _ => unimplemented!(), + SubqueryType::Exists | SubqueryType::Any => JoinType::LeftMark, }; let subquery_root = self.conv_into_optd_plan_node(subquery, Some(input_schema))?; @@ -345,7 +344,7 @@ impl OptdPlanContext<'_> { assert!(!insq.negated, "unimplemented"); let new_column_ref_idx = context.fields().len() + subqueries.len(); - subqueries.push((sq, SubqueryType::Scalar)); + subqueries.push((sq, SubqueryType::Any)); Ok(BinOpPred::new( expr, ColumnRefPred::new(new_column_ref_idx).into_pred_node(), @@ -516,7 +515,6 @@ impl OptdPlanContext<'_> { DFJoinType::LeftSemi => JoinType::LeftSemi, DFJoinType::RightSemi => JoinType::RightSemi, DFJoinType::LeftMark => JoinType::LeftMark, - _ => unimplemented!(), }; let mut log_ops = Vec::with_capacity(node.on.len()); let mut subqueries = vec![]; From d75394597f0b04a6093db0536226206a18fae63a Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 8 Dec 2024 20:46:49 -0500 Subject: [PATCH 35/47] Handle correlated IN (hopefully) properly, now --- optd-datafusion-bridge/src/from_optd.rs | 2 +- optd-datafusion-bridge/src/into_optd.rs | 53 ++++----- .../src/adv_stats/filter.rs | 2 +- optd-datafusion-repr/src/explain.rs | 2 +- optd-datafusion-repr/src/lib.rs | 2 +- optd-datafusion-repr/src/memo_ext.rs | 4 +- optd-datafusion-repr/src/plan_nodes.rs | 8 +- optd-datafusion-repr/src/plan_nodes/macros.rs | 8 +- .../src/plan_nodes/subquery.rs | 26 ++++- .../src/properties/column_ref.rs | 17 ++- optd-datafusion-repr/src/properties/schema.rs | 17 ++- .../src/rules/filter_pushdown.rs | 2 +- .../src/rules/subquery/depjoin_pushdown.rs | 105 ++++++++++-------- optd-sqllogictest/slt/unnest-in-uncor.slt | 13 +++ optd-sqllogictest/slt/unnest-in.slt | 5 +- optd-sqllogictest/slt/unnest-not-in-uncor.slt | 13 +++ 16 files changed, 180 insertions(+), 99 deletions(-) create mode 100644 optd-sqllogictest/slt/unnest-in-uncor.slt create mode 100644 optd-sqllogictest/slt/unnest-not-in-uncor.slt diff --git a/optd-datafusion-bridge/src/from_optd.rs b/optd-datafusion-bridge/src/from_optd.rs index e67e97b1..6c4b47dd 100644 --- a/optd-datafusion-bridge/src/from_optd.rs +++ b/optd-datafusion-bridge/src/from_optd.rs @@ -476,7 +476,7 @@ impl OptdPlanContext<'_> { let physical_expr = self.conv_from_optd_expr(node.cond(), &Arc::new(filter_schema.clone()))?; - if node.join_type() == JoinType::Cross { + if *node.join_type() == JoinType::Cross { return Ok(Arc::new(CrossJoinExec::new(left_exec, right_exec)) as Arc); } diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index 22ea4c0b..155de062 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -3,6 +3,8 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. +use std::sync::Arc; + use anyhow::{bail, Result}; use datafusion::common::DFSchema; use datafusion::logical_expr::{self, logical_plan, LogicalPlan, Operator}; @@ -15,23 +17,16 @@ use optd_datafusion_repr::plan_nodes::{ ConstantPred, DfReprPlanNode, DfReprPredNode, ExternColumnRefPred, FuncPred, FuncType, InListPred, JoinType, LikePred, ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalEmptyRelation, LogicalFilter, LogicalJoin, LogicalLimit, LogicalProjection, LogicalScan, - LogicalSort, RawDependentJoin, SortOrderPred, SortOrderType, + LogicalSort, RawDependentJoin, SortOrderPred, SortOrderType, SubqueryType, }; use optd_datafusion_repr::properties::schema::Schema as OptdSchema; use crate::OptdPlanContext; -#[derive(Debug, Clone, Copy)] -enum SubqueryType { - Scalar, - Exists, - Any, -} - impl OptdPlanContext<'_> { fn subqueries_to_dependent_joins( &mut self, - subqueries: &[(&Subquery, SubqueryType)], + subqueries: Vec<(&Subquery, SubqueryType)>, input: ArcDfPlanNode, input_schema: &DFSchema, ) -> Result { @@ -42,13 +37,8 @@ impl OptdPlanContext<'_> { outer_ref_columns, }, sq_typ, - ) in subqueries.iter() + ) in subqueries.into_iter() { - let dep_join_type = match sq_typ { - SubqueryType::Scalar => JoinType::Inner, - SubqueryType::Exists | SubqueryType::Any => JoinType::LeftMark, - }; - let subquery_root = self.conv_into_optd_plan_node(subquery, Some(input_schema))?; let dep_join = RawDependentJoin::new( node, @@ -71,7 +61,7 @@ impl OptdPlanContext<'_> { }) .collect(), ), - dep_join_type, + sq_typ, ); node = dep_join.into_plan_node(); } @@ -341,16 +331,27 @@ impl OptdPlanContext<'_> { let sq = &insq.subquery; let expr = self.conv_into_optd_expr(insq.expr.as_ref(), context, dep_ctx, subqueries)?; - assert!(!insq.negated, "unimplemented"); + let negated = insq.negated; let new_column_ref_idx = context.fields().len() + subqueries.len(); - subqueries.push((sq, SubqueryType::Any)); - Ok(BinOpPred::new( - expr, - ColumnRefPred::new(new_column_ref_idx).into_pred_node(), - BinOpType::Eq, - ) - .into_pred_node()) + subqueries.push(( + sq, + SubqueryType::Any { + pred: Arc::unwrap_or_clone(expr), + op: BinOpType::Eq, + }, + )); + if negated { + Ok(FuncPred::new( + FuncType::Not, + ListPred::new( + vec![ColumnRefPred::new(new_column_ref_idx).into_pred_node()], + ), + ) + .into_pred_node()) + } else { + Ok(ColumnRefPred::new(new_column_ref_idx).into_pred_node()) + } } _ => bail!("Unsupported expression: {:?}", expr), } @@ -369,7 +370,7 @@ impl OptdPlanContext<'_> { dep_ctx, &mut subqueries, )?; - let input = self.subqueries_to_dependent_joins(&subqueries, input, node.input.schema())?; + let input = self.subqueries_to_dependent_joins(subqueries, input, node.input.schema())?; Ok(LogicalProjection::new(input, expr_list)) } @@ -387,7 +388,7 @@ impl OptdPlanContext<'_> { &mut subqueries, )?; - let input = self.subqueries_to_dependent_joins(&subqueries, input, node.input.schema())?; + let input = self.subqueries_to_dependent_joins(subqueries, input, node.input.schema())?; Ok(LogicalFilter::new(input, expr)) } diff --git a/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs b/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs index 32dd6be8..a3d2ab6f 100644 --- a/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs +++ b/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs @@ -12,7 +12,7 @@ use optd_datafusion_repr::plan_nodes::{ use optd_datafusion_repr::properties::column_ref::{ BaseTableColumnRef, BaseTableColumnRefs, ColumnRef, GroupColumnRefs, }; -use optd_datafusion_repr::properties::schema::{Field, Schema}; +use optd_datafusion_repr::properties::schema::Schema; use optd_datafusion_repr::Value; use serde::de::DeserializeOwned; use serde::Serialize; diff --git a/optd-datafusion-repr/src/explain.rs b/optd-datafusion-repr/src/explain.rs index d7eaa686..068353b3 100644 --- a/optd-datafusion-repr/src/explain.rs +++ b/optd-datafusion-repr/src/explain.rs @@ -75,7 +75,7 @@ pub fn explain_plan_node( DfNodeType::RawDepJoin(_) => RawDependentJoin::from_plan_node(node) .unwrap() .explain(meta_map), - DfNodeType::DepJoin(_) => DependentJoin::from_plan_node(node) + DfNodeType::DepJoin => DependentJoin::from_plan_node(node) .unwrap() .explain(meta_map), DfNodeType::Scan => LogicalScan::from_plan_node(node).unwrap().explain(meta_map), diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index f6848888..5a491d1d 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -20,7 +20,7 @@ pub use optd_core::nodes::Value; use optd_core::optimizer::Optimizer; use optd_core::rules::Rule; pub use optimizer_ext::OptimizerExt; -use plan_nodes::{ArcDfPlanNode, DfNodeType, DfReprPlanNode}; +use plan_nodes::{ArcDfPlanNode, DfNodeType}; use properties::column_ref::ColumnRefPropertyBuilder; use properties::schema::{Catalog, SchemaPropertyBuilder}; diff --git a/optd-datafusion-repr/src/memo_ext.rs b/optd-datafusion-repr/src/memo_ext.rs index 642dd711..3c0f7bb5 100644 --- a/optd-datafusion-repr/src/memo_ext.rs +++ b/optd-datafusion-repr/src/memo_ext.rs @@ -41,7 +41,7 @@ fn enumerate_join_order_expr_inner + ?Sized>( visited: &mut HashMap>, ) -> Vec { let expr = memo.get_expr_memoed(current); - match expr.typ { + match &expr.typ { DfNodeType::Scan => { let table = memo.get_pred(expr.predicates[0]); // TODO: use unified repr let table = ConstantPred::from_pred_node(table) @@ -50,7 +50,7 @@ fn enumerate_join_order_expr_inner + ?Sized>( .as_str(); vec![LogicalJoinOrder::Table(table)] } - DfNodeType::Join(_) | DfNodeType::DepJoin(_) | DfNodeType::RawDepJoin(_) => { + DfNodeType::Join(_) | DfNodeType::DepJoin | DfNodeType::RawDepJoin(_) => { // Assume child 0 == left, child 1 == right let left = expr.children[0]; let right = expr.children[1]; diff --git a/optd-datafusion-repr/src/plan_nodes.rs b/optd-datafusion-repr/src/plan_nodes.rs index 72f1a766..e986f313 100644 --- a/optd-datafusion-repr/src/plan_nodes.rs +++ b/optd-datafusion-repr/src/plan_nodes.rs @@ -39,7 +39,7 @@ use pretty_xmlish::{Pretty, PrettyConfig}; pub use projection::{LogicalProjection, PhysicalProjection}; pub use scan::{LogicalScan, PhysicalScan}; pub use sort::{LogicalSort, PhysicalSort}; -pub use subquery::{DependentJoin, RawDependentJoin}; // Add missing import +pub use subquery::{DependentJoin, RawDependentJoin, SubqueryType}; use crate::explain::{explain_plan_node, explain_pred_node}; @@ -69,7 +69,7 @@ impl std::fmt::Display for DfPredType { /// DfNodeType FAQ: /// - The define_plan_node!() macro defines what the children of each join node are -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum DfNodeType { // Developers: update `is_logical` function after adding new plan nodes // Plan nodes @@ -77,8 +77,8 @@ pub enum DfNodeType { Filter, Scan, Join(JoinType), - RawDepJoin(JoinType), - DepJoin(JoinType), + RawDepJoin(SubqueryType), + DepJoin, Sort, Agg, EmptyRelation, diff --git a/optd-datafusion-repr/src/plan_nodes/macros.rs b/optd-datafusion-repr/src/plan_nodes/macros.rs index a674ec1e..232e60d7 100644 --- a/optd-datafusion-repr/src/plan_nodes/macros.rs +++ b/optd-datafusion-repr/src/plan_nodes/macros.rs @@ -18,7 +18,7 @@ macro_rules! define_plan_node { fn from_plan_node(plan_node: ArcDfPlanNode) -> Option { #[allow(unused_variables)] - if let DfNodeType :: $variant $( ($inner_name) )? = plan_node.typ { + if let DfNodeType :: $variant $( ($inner_name) )? = &plan_node.typ { Some(Self(plan_node)) } else { None @@ -105,9 +105,9 @@ macro_rules! define_plan_node { )* $( - pub fn $inner_name(&self) -> JoinType { - if let DfNodeType :: $variant ($inner_name) = self.0 .typ { - return $inner_name; + pub fn $inner_name(&self) -> &$inner_typ { + if let DfNodeType :: $variant ($inner_name) = &self.0.typ { + return &$inner_name; } else { unreachable!(); } diff --git a/optd-datafusion-repr/src/plan_nodes/subquery.rs b/optd-datafusion-repr/src/plan_nodes/subquery.rs index abd400bc..a0c8d6c6 100644 --- a/optd-datafusion-repr/src/plan_nodes/subquery.rs +++ b/optd-datafusion-repr/src/plan_nodes/subquery.rs @@ -3,11 +3,31 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. +use core::fmt; +use std::fmt::Display; + use super::macros::define_plan_node; use super::{ - ArcDfPlanNode, ArcDfPredNode, DfNodeType, DfPlanNode, DfReprPlanNode, JoinType, ListPred, + ArcDfPlanNode, ArcDfPredNode, BinOpType, DfNodeType, DfPlanNode, DfPredNode, DfReprPlanNode, + ListPred, }; +/// These are the only three fundamental types of subqueries. +/// Refer to the Unnesting Arbitrary Queries talk by Mark Raasveldt for +/// info on how to translate other subquery types to these three. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum SubqueryType { + Scalar, + Exists, + Any { pred: DfPredNode, op: BinOpType }, +} + +impl Display for SubqueryType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } +} + #[derive(Clone, Debug)] pub struct RawDependentJoin(pub ArcDfPlanNode); @@ -19,7 +39,7 @@ define_plan_node!( ], [ { 0, cond: ArcDfPredNode }, { 1, extern_cols: ListPred } - ], { join_type: JoinType } + ], { sq_type: SubqueryType } ); #[derive(Clone, Debug)] @@ -33,5 +53,5 @@ define_plan_node!( ], [ { 0, cond: ArcDfPredNode }, { 1, extern_cols: ListPred } - ], { join_type: JoinType } + ] ); diff --git a/optd-datafusion-repr/src/properties/column_ref.rs b/optd-datafusion-repr/src/properties/column_ref.rs index 22fb602b..7846f2b7 100644 --- a/optd-datafusion-repr/src/properties/column_ref.rs +++ b/optd-datafusion-repr/src/properties/column_ref.rs @@ -16,7 +16,7 @@ use super::DEFAULT_NAME; use crate::{ plan_nodes::{ decode_empty_relation_schema, ArcDfPredNode, BinOpType, ConstantPred, DfNodeType, - DfPredType, DfReprPredNode, JoinType, LogOpType, + DfPredType, DfReprPredNode, JoinType, LogOpType, SubqueryType, }, utils::DisjointSets, }; @@ -451,9 +451,7 @@ impl LogicalPropertyBuilder for ColumnRefPropertyBuilder { GroupColumnRefs::new(column_refs, child.output_correlation.clone()) } // Should account for all physical join types. - DfNodeType::Join(join_type) - | DfNodeType::RawDepJoin(join_type) - | DfNodeType::DepJoin(join_type) => { + DfNodeType::Join(join_type) => { // Concatenate left and right children column refs. let column_refs = Self::concat_children_col_refs(&children[0..2]); // Merge the equal columns of two children as input correlation. @@ -502,6 +500,17 @@ impl LogicalPropertyBuilder for ColumnRefPropertyBuilder { }; GroupColumnRefs::new(column_refs, output_correlation) } + DfNodeType::RawDepJoin(sq_type) => match sq_type { + SubqueryType::Scalar => { + self.derive(DfNodeType::Join(JoinType::Inner), predicates, children) + } + SubqueryType::Exists | SubqueryType::Any { pred: _, op: _ } => { + self.derive(DfNodeType::Join(JoinType::LeftMark), predicates, children) + } + }, + DfNodeType::DepJoin => { + self.derive(DfNodeType::Join(JoinType::Inner), predicates, children) + } DfNodeType::Agg => { let child = children[0]; // Group by columns first. diff --git a/optd-datafusion-repr/src/properties/schema.rs b/optd-datafusion-repr/src/properties/schema.rs index 5406eaf4..de9d2261 100644 --- a/optd-datafusion-repr/src/properties/schema.rs +++ b/optd-datafusion-repr/src/properties/schema.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; use super::DEFAULT_NAME; use crate::plan_nodes::{ decode_empty_relation_schema, ArcDfPredNode, ConstantPred, ConstantType, DfNodeType, - DfPredType, DfReprPredNode, FuncType, + DfPredType, DfReprPredNode, FuncType, JoinType, SubqueryType, }; #[derive(Clone, Debug, Serialize, Deserialize)] @@ -177,9 +177,7 @@ impl LogicalPropertyBuilder for SchemaPropertyBuilder { } DfNodeType::Projection => Self::derive_for_predicate(predicates[0].clone()), DfNodeType::Filter | DfNodeType::Limit | DfNodeType::Sort => children[0].clone(), - DfNodeType::Join(join_type) - | DfNodeType::RawDepJoin(join_type) - | DfNodeType::DepJoin(join_type) => { + DfNodeType::Join(join_type) => { use crate::plan_nodes::JoinType::*; match join_type { Inner | LeftOuter | RightOuter | FullOuter | Cross => { @@ -201,6 +199,17 @@ impl LogicalPropertyBuilder for SchemaPropertyBuilder { } } } + DfNodeType::RawDepJoin(sq_type) => match sq_type { + SubqueryType::Scalar => { + self.derive(DfNodeType::Join(JoinType::Inner), predicates, children) + } + SubqueryType::Exists | SubqueryType::Any { pred: _, op: _ } => { + self.derive(DfNodeType::Join(JoinType::LeftMark), predicates, children) + } + }, + DfNodeType::DepJoin => { + self.derive(DfNodeType::Join(JoinType::Inner), predicates, children) + } DfNodeType::EmptyRelation => decode_empty_relation_schema(&predicates[1]), x => unimplemented!("cannot derive schema property for {}", x), } diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index c576b844..439bd28a 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -270,7 +270,7 @@ fn filter_join_transpose( } _ => { // We don't support modifying the join condition for other join types yet - LogicalJoin::new_unchecked(new_left, new_right, join_cond, join_typ) + LogicalJoin::new_unchecked(new_left, new_right, join_cond, *join_typ) } }; diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 3f7f670f..5d7d610f 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -3,8 +3,6 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. -use std::iter; - use optd_core::nodes::{PlanNodeOrGroup, PredNode}; use optd_core::optimizer::Optimizer; use optd_core::rules::{Rule, RuleMatcher}; @@ -13,9 +11,9 @@ use crate::plan_nodes::{ ArcDfPlanNode, ArcDfPredNode, BinOpPred, BinOpType, ColumnRefPred, ConstantPred, DependentJoin, DfNodeType, DfPredType, DfReprPlanNode, DfReprPredNode, ExternColumnRefPred, FuncPred, FuncType, JoinType, ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalFilter, LogicalJoin, - LogicalProjection, PredExt, RawDependentJoin, + LogicalProjection, PredExt, RawDependentJoin, SubqueryType, }; -use crate::rules::macros::define_rule_discriminant; +use crate::rules::macros::{define_rule, define_rule_discriminant}; use crate::OptimizerExt; /// Like rewrite_column_refs, except it translates ExternColumnRefs into ColumnRefs @@ -52,7 +50,7 @@ fn rewrite_extern_column_refs( define_rule_discriminant!( DepInitialDistinct, apply_dep_initial_distinct, - (RawDepJoin(JoinType::Cross), left, right) + (RawDepJoin(SubqueryType::Scalar), left, right) ); /// Initial rule to generate a join above this dependent join, and push the dependent @@ -81,8 +79,9 @@ fn apply_dep_initial_distinct( .map(|x| ExternColumnRefPred::from_pred_node(x).unwrap().index()) .collect::>(); - // If we have no correlated columns, just emit a cross join instead - if correlated_col_indices.is_empty() { + // If we have no correlated columns, for a scalar subquery, we can emit a cross join + // TODO: Uncorrelated for ANY/EXISTS + if correlated_col_indices.is_empty() && matches!(join.sq_type(), SubqueryType::Scalar) { let new_join = LogicalJoin::new_unchecked( left, right, @@ -106,13 +105,8 @@ fn apply_dep_initial_distinct( ), ); - let new_dep_join = DependentJoin::new_unchecked( - distinct_agg_node.into_plan_node(), - right, - cond, - extern_cols, - join.join_type(), - ); + let new_dep_join = + DependentJoin::new_unchecked(distinct_agg_node.into_plan_node(), right, cond, extern_cols); // Our join condition is going to make sure that all of the correlated columns // in the right side are equal to their equivalent columns in the left side. @@ -122,34 +116,58 @@ fn apply_dep_initial_distinct( // // This is because the aggregate we install on the right side will map the // correlated columns to their respective indices as shown. - let join_cond = LogOpPred::new( - LogOpType::And, - correlated_col_indices - .iter() - .enumerate() - .map(|(i, x)| { - assert!(i + left_schema_size < left_schema_size + right_schema_size); - BinOpPred::new( - ColumnRefPred::new(*x).into_pred_node(), - ColumnRefPred::new(i + left_schema_size).into_pred_node(), - BinOpType::Eq, - ) - .into_pred_node() - }) - .collect(), - ); + assert!(correlated_col_indices.len() > 0); + let join_cond = match join.sq_type() { + SubqueryType::Scalar | SubqueryType::Exists => LogOpPred::new( + LogOpType::And, + correlated_col_indices + .iter() + .enumerate() + .map(|(i, x)| { + assert!(i + left_schema_size < left_schema_size + right_schema_size); + BinOpPred::new( + ColumnRefPred::new(*x).into_pred_node(), + ColumnRefPred::new(i + left_schema_size).into_pred_node(), + BinOpType::Eq, + ) + .into_pred_node() + }) + .collect(), + ), + SubqueryType::Any { pred, op } => LogOpPred::new( + LogOpType::And, + correlated_col_indices + .iter() + .enumerate() + .map(|(i, x)| { + assert!(i + left_schema_size < left_schema_size + right_schema_size); + BinOpPred::new( + pred.clone().into(), + ColumnRefPred::new(i + left_schema_size).into_pred_node(), + *op, + ) + .into_pred_node() + }) + .collect(), + ), + }; + + let join_type = match join.sq_type() { + SubqueryType::Scalar => JoinType::Inner, + SubqueryType::Exists | SubqueryType::Any { pred: _, op: _ } => JoinType::LeftMark, + }; let new_join = LogicalJoin::new_unchecked( left, new_dep_join.into_plan_node(), join_cond.into_pred_node(), - join.join_type(), + join_type, ); // Ensure that the schema above the new_join is the same as it was before // for correctness (Project the left side of the new join, // plus the *right side of the right side*) - let node = if matches!(join.join_type(), JoinType::Inner) { + let node = if matches!(join.sq_type(), SubqueryType::Scalar) { LogicalProjection::new( new_join.into_plan_node(), ListPred::new( @@ -170,10 +188,10 @@ fn apply_dep_initial_distinct( vec![node.into()] } -define_rule_discriminant!( +define_rule!( DepJoinPastProj, apply_dep_join_past_proj, - (DepJoin(JoinType::Cross), left, (Projection, right)) + (DepJoin, left, (Projection, right)) ); /// Pushes a dependent join past a projection node. @@ -210,17 +228,16 @@ fn apply_dep_join_past_proj( .collect(), ); - let new_dep_join = - DependentJoin::new_unchecked(left, right, cond, extern_cols, join.join_type()); + let new_dep_join = DependentJoin::new_unchecked(left, right, cond, extern_cols); let new_proj = LogicalProjection::new(new_dep_join.into_plan_node(), new_proj_exprs); vec![new_proj.into_plan_node().into()] } -define_rule_discriminant!( +define_rule!( DepJoinPastFilter, apply_dep_join_past_filter, - (DepJoin(JoinType::Cross), left, (Filter, right)) + (DepJoin, left, (Filter, right)) ); /// Pushes a dependent join past a projection node. @@ -273,7 +290,6 @@ fn apply_dep_join_past_filter( .map(|x| ExternColumnRefPred::new(x).into_pred_node()) .collect(), ), - join.join_type(), ); let new_filter = LogicalFilter::new(new_dep_join.into_plan_node(), rewritten_expr); @@ -281,10 +297,10 @@ fn apply_dep_join_past_filter( vec![new_filter.into_plan_node().into()] } -define_rule_discriminant!( +define_rule!( DepJoinPastAgg, apply_dep_join_past_agg, - (DepJoin(JoinType::Cross), left, (Agg, right)) + (DepJoin, left, (Agg, right)) ); /// Pushes a dependent join past an aggregation node @@ -348,8 +364,7 @@ fn apply_dep_join_past_agg( .collect(), ); - let new_dep_join = - DependentJoin::new_unchecked(left.clone(), right, cond, extern_cols, join.join_type()); + let new_dep_join = DependentJoin::new_unchecked(left.clone(), right, cond, extern_cols); let new_agg_exprs_size = new_exprs.len(); let new_agg_groups_size = new_groups.len(); @@ -438,10 +453,10 @@ fn apply_dep_join_past_agg( // Heuristics-only rule. If we don't have references to the external columns on the right side, // we can rewrite the dependent join into a normal join. -define_rule_discriminant!( +define_rule!( DepJoinEliminate, apply_dep_join_eliminate_at_scan, // TODO matching is all wrong - (DepJoin(JoinType::Cross), left, right) + (DepJoin, left, right) ); /// If we've gone all the way down to the scan node, we can swap the dependent join diff --git a/optd-sqllogictest/slt/unnest-in-uncor.slt b/optd-sqllogictest/slt/unnest-in-uncor.slt new file mode 100644 index 00000000..ad4841e5 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-in-uncor.slt @@ -0,0 +1,13 @@ +include _tpch_tables.slt.part + +query +SELECT c.c_custkey, c.c_name +FROM customer c +WHERE c.c_custkey IN ( + SELECT o.o_custkey + FROM orders o + WHERE o.o_totalprice > 250000 +) order by c.c_custkey; +---- +10 Customer#000000010 +70 Customer#000000070 diff --git a/optd-sqllogictest/slt/unnest-in.slt b/optd-sqllogictest/slt/unnest-in.slt index ad4841e5..dfd3cc7b 100644 --- a/optd-sqllogictest/slt/unnest-in.slt +++ b/optd-sqllogictest/slt/unnest-in.slt @@ -6,8 +6,9 @@ FROM customer c WHERE c.c_custkey IN ( SELECT o.o_custkey FROM orders o - WHERE o.o_totalprice > 250000 -) order by c.c_custkey; + WHERE o.o_custkey = c.c_custkey AND o.o_totalprice > 250000 +) +ORDER BY c.c_custkey; ---- 10 Customer#000000010 70 Customer#000000070 diff --git a/optd-sqllogictest/slt/unnest-not-in-uncor.slt b/optd-sqllogictest/slt/unnest-not-in-uncor.slt new file mode 100644 index 00000000..aefb5f90 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-not-in-uncor.slt @@ -0,0 +1,13 @@ +include _tpch_tables.slt.part + +query +SELECT c.c_custkey, c.c_name +FROM customer c +WHERE c.c_custkey NOT IN ( + SELECT o.o_custkey + FROM orders o + WHERE o.o_orderstatus = 'O' +) order by c.c_custkey; +---- +10 Customer#000000010 +70 Customer#000000070 From f9aaccb55b172bf1343819dbdb2a10ffdbdeb0c6 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 8 Dec 2024 20:47:38 -0500 Subject: [PATCH 36/47] Assign TODO to myself --- optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 5d7d610f..0005b895 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -80,7 +80,7 @@ fn apply_dep_initial_distinct( .collect::>(); // If we have no correlated columns, for a scalar subquery, we can emit a cross join - // TODO: Uncorrelated for ANY/EXISTS + // TODO(bowad): Uncorrelated for ANY/EXISTS if correlated_col_indices.is_empty() && matches!(join.sq_type(), SubqueryType::Scalar) { let new_join = LogicalJoin::new_unchecked( left, From 20bd1908e9bec835a378ab17cf9642dd5687acb9 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 8 Dec 2024 23:01:53 -0500 Subject: [PATCH 37/47] update in exists to be correlated --- optd-sqllogictest/slt/unnest-in-exists.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optd-sqllogictest/slt/unnest-in-exists.slt b/optd-sqllogictest/slt/unnest-in-exists.slt index 29b24b33..bf9d02a5 100644 --- a/optd-sqllogictest/slt/unnest-in-exists.slt +++ b/optd-sqllogictest/slt/unnest-in-exists.slt @@ -6,7 +6,7 @@ FROM customer c WHERE c.c_custkey IN ( SELECT o.o_custkey FROM orders o - WHERE o.o_totalprice > 250000 + WHERE o.o_custkey = c.c_custkey AND o.o_totalprice > 250000 ) AND EXISTS ( SELECT 1 From 41867f380637073603f602f4e42a282a840274b8 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 8 Dec 2024 23:25:58 -0500 Subject: [PATCH 38/47] Uncorrelated IN --- .../src/rules/subquery/depjoin_pushdown.rs | 26 ++++++++++ optd-sqllogictest/slt/unnest-not-in-uncor.slt | 52 ++++++++++++++++++- 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 0005b895..74a1e8cd 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -81,6 +81,32 @@ fn apply_dep_initial_distinct( // If we have no correlated columns, for a scalar subquery, we can emit a cross join // TODO(bowad): Uncorrelated for ANY/EXISTS + if correlated_col_indices.is_empty() { + let res = match join.sq_type() { + SubqueryType::Scalar => LogicalJoin::new_unchecked( + left, + right, + ConstantPred::bool(true).into_pred_node(), + JoinType::Cross, + ) + .into_plan_node(), + SubqueryType::Exists => todo!(), + SubqueryType::Any { pred, op } => LogicalJoin::new_unchecked( + left, + right, + BinOpPred::new( + pred.clone().into(), + ColumnRefPred::new(left_schema_size).into_pred_node(), + *op, + ) + .into_pred_node(), + JoinType::LeftMark, + ) + .into_plan_node(), + }; + + return vec![res.into()]; + } if correlated_col_indices.is_empty() && matches!(join.sq_type(), SubqueryType::Scalar) { let new_join = LogicalJoin::new_unchecked( left, diff --git a/optd-sqllogictest/slt/unnest-not-in-uncor.slt b/optd-sqllogictest/slt/unnest-not-in-uncor.slt index aefb5f90..226c7ebf 100644 --- a/optd-sqllogictest/slt/unnest-not-in-uncor.slt +++ b/optd-sqllogictest/slt/unnest-not-in-uncor.slt @@ -9,5 +9,53 @@ WHERE c.c_custkey NOT IN ( WHERE o.o_orderstatus = 'O' ) order by c.c_custkey; ---- -10 Customer#000000010 -70 Customer#000000070 +3 Customer#000000003 +6 Customer#000000006 +9 Customer#000000009 +12 Customer#000000012 +15 Customer#000000015 +18 Customer#000000018 +21 Customer#000000021 +24 Customer#000000024 +27 Customer#000000027 +30 Customer#000000030 +33 Customer#000000033 +36 Customer#000000036 +39 Customer#000000039 +42 Customer#000000042 +45 Customer#000000045 +48 Customer#000000048 +51 Customer#000000051 +54 Customer#000000054 +57 Customer#000000057 +60 Customer#000000060 +63 Customer#000000063 +66 Customer#000000066 +69 Customer#000000069 +72 Customer#000000072 +75 Customer#000000075 +78 Customer#000000078 +81 Customer#000000081 +84 Customer#000000084 +87 Customer#000000087 +90 Customer#000000090 +93 Customer#000000093 +96 Customer#000000096 +99 Customer#000000099 +102 Customer#000000102 +105 Customer#000000105 +108 Customer#000000108 +111 Customer#000000111 +114 Customer#000000114 +117 Customer#000000117 +120 Customer#000000120 +123 Customer#000000123 +126 Customer#000000126 +129 Customer#000000129 +132 Customer#000000132 +135 Customer#000000135 +138 Customer#000000138 +141 Customer#000000141 +144 Customer#000000144 +147 Customer#000000147 +150 Customer#000000150 From 3a93414ca4aa31e763c8412d81fb3569f4bb0fc2 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 8 Dec 2024 23:27:46 -0500 Subject: [PATCH 39/47] Q16 working --- optd-sqllogictest/slt/tpch-q16.slt | 68 +++++++++++++++++++++ optd-sqllogictest/slt/tpch-q16.slt.disabled | 68 --------------------- 2 files changed, 68 insertions(+), 68 deletions(-) create mode 100644 optd-sqllogictest/slt/tpch-q16.slt delete mode 100644 optd-sqllogictest/slt/tpch-q16.slt.disabled diff --git a/optd-sqllogictest/slt/tpch-q16.slt b/optd-sqllogictest/slt/tpch-q16.slt new file mode 100644 index 00000000..b2c6a9c4 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q16.slt @@ -0,0 +1,68 @@ +include _tpch_tables.slt.part + +query +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; +---- +Brand#11 PROMO ANODIZED TIN 45 4 +Brand#11 SMALL PLATED COPPER 45 4 +Brand#11 STANDARD POLISHED TIN 45 4 +Brand#13 MEDIUM ANODIZED STEEL 36 4 +Brand#13 SMALL BRUSHED NICKEL 19 4 +Brand#14 SMALL ANODIZED NICKEL 45 4 +Brand#15 LARGE ANODIZED BRASS 45 4 +Brand#21 LARGE BURNISHED COPPER 19 4 +Brand#23 ECONOMY BRUSHED COPPER 9 4 +Brand#24 MEDIUM PLATED STEEL 19 4 +Brand#25 MEDIUM PLATED BRASS 45 4 +Brand#25 SMALL BURNISHED COPPER 3 4 +Brand#31 ECONOMY PLATED STEEL 23 4 +Brand#31 PROMO POLISHED TIN 23 4 +Brand#32 MEDIUM BURNISHED BRASS 49 4 +Brand#33 LARGE BRUSHED TIN 36 4 +Brand#33 SMALL BURNISHED NICKEL 3 4 +Brand#34 LARGE PLATED BRASS 45 4 +Brand#34 MEDIUM BRUSHED COPPER 9 4 +Brand#34 SMALL PLATED BRASS 14 4 +Brand#35 STANDARD ANODIZED STEEL 23 4 +Brand#43 MEDIUM ANODIZED BRASS 14 4 +Brand#43 PROMO POLISHED BRASS 19 4 +Brand#43 SMALL BRUSHED NICKEL 9 4 +Brand#44 SMALL PLATED COPPER 19 4 +Brand#51 ECONOMY POLISHED STEEL 49 4 +Brand#52 MEDIUM BURNISHED TIN 45 4 +Brand#52 SMALL BURNISHED NICKEL 14 4 +Brand#53 LARGE BURNISHED NICKEL 23 4 +Brand#53 MEDIUM BRUSHED COPPER 3 4 +Brand#53 STANDARD PLATED STEEL 45 4 +Brand#54 ECONOMY ANODIZED BRASS 9 4 +Brand#55 STANDARD ANODIZED BRASS 36 4 +Brand#55 STANDARD BRUSHED COPPER 3 4 diff --git a/optd-sqllogictest/slt/tpch-q16.slt.disabled b/optd-sqllogictest/slt/tpch-q16.slt.disabled deleted file mode 100644 index f480ca1b..00000000 --- a/optd-sqllogictest/slt/tpch-q16.slt.disabled +++ /dev/null @@ -1,68 +0,0 @@ -include _tpch_tables.slt.part - -query -select - p_brand, - p_type, - p_size, - count(distinct ps_suppkey) as supplier_cnt -from - partsupp, - part -where - p_partkey = ps_partkey - and p_brand <> 'Brand#45' - and p_type not like 'MEDIUM POLISHED%' - and p_size in (49, 14, 23, 45, 19, 3, 36, 9) - and ps_suppkey not in ( - select - s_suppkey - from - supplier - where - s_comment like '%Customer%Complaints%' - ) -group by - p_brand, - p_type, - p_size -order by - supplier_cnt desc, - p_brand, - p_type, - p_size; ----- -Brand#11 PROMO ANODIZED TIN 45 4 -Brand#11 SMALL PLATED COPPER 45 4 -Brand#11 STANDARD POLISHED TIN 45 4 -Brand#13 MEDIUM ANODIZED STEEL 36 4 -Brand#14 SMALL ANODIZED NICKEL 45 4 -Brand#15 LARGE ANODIZED BRASS 45 4 -Brand#21 LARGE BURNISHED COPPER 19 4 -Brand#23 ECONOMY BRUSHED COPPER 9 4 -Brand#25 MEDIUM PLATED BRASS 45 4 -Brand#31 ECONOMY PLATED STEEL 23 4 -Brand#31 PROMO POLISHED TIN 23 4 -Brand#32 MEDIUM BURNISHED BRASS 49 4 -Brand#33 LARGE BRUSHED TIN 36 4 -Brand#33 SMALL BURNISHED NICKEL 3 4 -Brand#34 LARGE PLATED BRASS 45 4 -Brand#34 MEDIUM BRUSHED COPPER 9 4 -Brand#34 SMALL PLATED BRASS 14 4 -Brand#35 STANDARD ANODIZED STEEL 23 4 -Brand#43 PROMO POLISHED BRASS 19 4 -Brand#43 SMALL BRUSHED NICKEL 9 4 -Brand#44 SMALL PLATED COPPER 19 4 -Brand#52 MEDIUM BURNISHED TIN 45 4 -Brand#52 SMALL BURNISHED NICKEL 14 4 -Brand#53 MEDIUM BRUSHED COPPER 3 4 -Brand#55 STANDARD ANODIZED BRASS 36 4 -Brand#55 STANDARD BRUSHED COPPER 3 4 -Brand#13 SMALL BRUSHED NICKEL 19 2 -Brand#25 SMALL BURNISHED COPPER 3 2 -Brand#43 MEDIUM ANODIZED BRASS 14 2 -Brand#53 STANDARD PLATED STEEL 45 2 -Brand#24 MEDIUM PLATED STEEL 19 1 -Brand#51 ECONOMY POLISHED STEEL 49 1 -Brand#53 LARGE BURNISHED NICKEL 23 1 -Brand#54 ECONOMY ANODIZED BRASS 9 1 From 88768837ccd6e8a3b685f08e6a778d9f84217856 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 8 Dec 2024 23:52:03 -0500 Subject: [PATCH 40/47] Unnesting of correlated EXISTS clauses --- .../src/rules/subquery/depjoin_pushdown.rs | 41 ++++++++++++++++++- optd-sqllogictest/slt/unnest-exists-uncor.slt | 17 ++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 optd-sqllogictest/slt/unnest-exists-uncor.slt diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 74a1e8cd..15e55cc1 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -3,6 +3,7 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. +use datafusion_expr::Aggregate; use optd_core::nodes::{PlanNodeOrGroup, PredNode}; use optd_core::optimizer::Optimizer; use optd_core::rules::{Rule, RuleMatcher}; @@ -11,7 +12,7 @@ use crate::plan_nodes::{ ArcDfPlanNode, ArcDfPredNode, BinOpPred, BinOpType, ColumnRefPred, ConstantPred, DependentJoin, DfNodeType, DfPredType, DfReprPlanNode, DfReprPredNode, ExternColumnRefPred, FuncPred, FuncType, JoinType, ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalFilter, LogicalJoin, - LogicalProjection, PredExt, RawDependentJoin, SubqueryType, + LogicalLimit, LogicalProjection, PredExt, RawDependentJoin, SubqueryType, }; use crate::rules::macros::{define_rule, define_rule_discriminant}; use crate::OptimizerExt; @@ -90,7 +91,43 @@ fn apply_dep_initial_distinct( JoinType::Cross, ) .into_plan_node(), - SubqueryType::Exists => todo!(), + SubqueryType::Exists => { + let right_lim_1 = LogicalLimit::new_unchecked( + right, + ConstantPred::int64(0).into_pred_node(), + ConstantPred::int64(1).into_pred_node(), + ) + .into_plan_node(); + let right_count_star = LogicalAgg::new( + right_lim_1.into(), + ListPred::new(vec![FuncPred::new( + FuncType::Agg("count".to_string()), + ListPred::new(vec![ConstantPred::int64(1).into_pred_node()]), + ) + .into_pred_node()]), + ListPred::new(vec![]), + ) + .into_plan_node(); + + let count_star_to_bool_proj = LogicalProjection::new( + right_count_star, + ListPred::new(vec![BinOpPred::new( + ColumnRefPred::new(0).into_pred_node(), + ConstantPred::int64(0).into_pred_node(), + BinOpType::Gt, + ) + .into_pred_node()]), + ) + .into_plan_node(); + + LogicalJoin::new_unchecked( + left, + count_star_to_bool_proj, + ConstantPred::bool(true).into_pred_node(), + JoinType::Cross, + ) + .into_plan_node() + } SubqueryType::Any { pred, op } => LogicalJoin::new_unchecked( left, right, diff --git a/optd-sqllogictest/slt/unnest-exists-uncor.slt b/optd-sqllogictest/slt/unnest-exists-uncor.slt new file mode 100644 index 00000000..932d5e5c --- /dev/null +++ b/optd-sqllogictest/slt/unnest-exists-uncor.slt @@ -0,0 +1,17 @@ +include _tpch_tables.slt.part + +query +SELECT c_name +FROM customer c +WHERE c_nationkey IN ( + SELECT n_nationkey + FROM nation + WHERE n_name = 'GERMANY' +); +---- +Customer#000000062 +Customer#000000071 +Customer#000000093 +Customer#000000119 +Customer#000000129 +Customer#000000136 From 83efff67034fa785f8250ce20fe5f8ba375aa2bb Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Sun, 8 Dec 2024 23:58:42 -0500 Subject: [PATCH 41/47] Update planner tests --- .../tests/subqueries/subquery_unnesting.planner.sql | 8 ++++---- optd-sqlplannertest/tests/tpch/q11.planner.sql | 2 +- optd-sqlplannertest/tests/tpch/q15.planner.sql | 2 +- optd-sqlplannertest/tests/tpch/q17.planner.sql | 2 +- optd-sqlplannertest/tests/tpch/q2.planner.sql | 6 +----- 5 files changed, 8 insertions(+), 12 deletions(-) diff --git a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql index 2daa9e1a..93f180ce 100644 --- a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql +++ b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql @@ -16,7 +16,7 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── cond:Gt │ ├── #2 │ └── 100(i64) - └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg @@ -102,7 +102,7 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── cond:Gt │ ├── #2 │ └── 100(i64) - └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg @@ -220,7 +220,7 @@ select t1v1, (select sum(t2v3) from t2 where t2v1 = t1v1) as sum from t1; /* LogicalProjection { exprs: [ #0, #2 ] } -└── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } +└── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg @@ -296,7 +296,7 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── cond:Gt │ ├── #2 │ └── 100(i64) - └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg diff --git a/optd-sqlplannertest/tests/tpch/q11.planner.sql b/optd-sqlplannertest/tests/tpch/q11.planner.sql index 6b5d0af9..3e31bceb 100644 --- a/optd-sqlplannertest/tests/tpch/q11.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q11.planner.sql @@ -36,7 +36,7 @@ LogicalSort ├── cond:Gt │ ├── Cast { cast_to: Decimal128(38, 15), child: #1 } │ └── #2 - └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [] } ├── LogicalAgg │ ├── exprs:Agg(Sum) │ │ └── Mul diff --git a/optd-sqlplannertest/tests/tpch/q15.planner.sql b/optd-sqlplannertest/tests/tpch/q15.planner.sql index 4273cf92..d66bc87d 100644 --- a/optd-sqlplannertest/tests/tpch/q15.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q15.planner.sql @@ -46,7 +46,7 @@ LogicalSort │ └── Eq │ ├── #8 │ └── #9 - └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [] } ├── LogicalJoin { join_type: Cross, cond: true } │ ├── LogicalScan { table: supplier } │ └── LogicalProjection { exprs: [ #0, #1 ] } diff --git a/optd-sqlplannertest/tests/tpch/q17.planner.sql b/optd-sqlplannertest/tests/tpch/q17.planner.sql index ab174a81..c2aa8009 100644 --- a/optd-sqlplannertest/tests/tpch/q17.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q17.planner.sql @@ -43,7 +43,7 @@ LogicalProjection │ └── Lt │ ├── Cast { cast_to: Decimal128(30, 15), child: #4 } │ └── #25 - └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#16) ] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#16) ] } ├── LogicalJoin { join_type: Cross, cond: true } │ ├── LogicalScan { table: lineitem } │ └── LogicalScan { table: part } diff --git a/optd-sqlplannertest/tests/tpch/q2.planner.sql b/optd-sqlplannertest/tests/tpch/q2.planner.sql index 6111bcc9..90fb0d2c 100644 --- a/optd-sqlplannertest/tests/tpch/q2.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q2.planner.sql @@ -81,7 +81,7 @@ LogicalLimit { skip: 0(i64), fetch: 100(i64) } │ └── Eq │ ├── #19 │ └── #28 - └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalJoin { join_type: Cross, cond: true } │ ├── LogicalJoin { join_type: Cross, cond: true } │ │ ├── LogicalJoin { join_type: Cross, cond: true } @@ -228,11 +228,7 @@ LogicalLimit { skip: 0(i64), fetch: 100(i64) } │ │ └── LogicalScan { table: supplier } │ └── LogicalScan { table: nation } └── LogicalScan { table: region } -<<<<<<< HEAD -PhysicalLimit { skip: 0(u64), fetch: 100(u64) } -======= PhysicalLimit { skip: 0(i64), fetch: 100(i64) } ->>>>>>> main └── PhysicalSort ├── exprs: │ ┌── SortOrder { order: Desc } From 9b9152cb1acc0f7d98e5221db434a9e6b9ad6523 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Mon, 9 Dec 2024 00:02:15 -0500 Subject: [PATCH 42/47] Cleanup --- optd-datafusion-bridge/src/into_optd.rs | 1 - optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index 155de062..0b8537a1 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -309,7 +309,6 @@ impl OptdPlanContext<'_> { Ok(ColumnRefPred::new(new_column_ref_idx).into_pred_node()) } Expr::Exists(ex) => { - // We could use mark join here, if we had one... let sq = &ex.subquery; let negated = ex.negated; diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 15e55cc1..bfcda284 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -80,8 +80,7 @@ fn apply_dep_initial_distinct( .map(|x| ExternColumnRefPred::from_pred_node(x).unwrap().index()) .collect::>(); - // If we have no correlated columns, for a scalar subquery, we can emit a cross join - // TODO(bowad): Uncorrelated for ANY/EXISTS + // If we have no correlated columns, we can skip the whole dependent join step if correlated_col_indices.is_empty() { let res = match join.sq_type() { SubqueryType::Scalar => LogicalJoin::new_unchecked( From 3f03aca2ed6d1b2dab109279dae4078e465106a9 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Mon, 9 Dec 2024 00:04:23 -0500 Subject: [PATCH 43/47] Clippy --- optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index bfcda284..b5f3614a 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -3,7 +3,6 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. -use datafusion_expr::Aggregate; use optd_core::nodes::{PlanNodeOrGroup, PredNode}; use optd_core::optimizer::Optimizer; use optd_core::rules::{Rule, RuleMatcher}; @@ -98,7 +97,7 @@ fn apply_dep_initial_distinct( ) .into_plan_node(); let right_count_star = LogicalAgg::new( - right_lim_1.into(), + right_lim_1, ListPred::new(vec![FuncPred::new( FuncType::Agg("count".to_string()), ListPred::new(vec![ConstantPred::int64(1).into_pred_node()]), @@ -178,7 +177,7 @@ fn apply_dep_initial_distinct( // // This is because the aggregate we install on the right side will map the // correlated columns to their respective indices as shown. - assert!(correlated_col_indices.len() > 0); + assert!(!correlated_col_indices.is_empty()); let join_cond = match join.sq_type() { SubqueryType::Scalar | SubqueryType::Exists => LogOpPred::new( LogOpType::And, From beec1ea7574415b1a2d4ec7ed28e227651a7323a Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Mon, 9 Dec 2024 00:04:53 -0500 Subject: [PATCH 44/47] Change to dbg assert --- optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index b5f3614a..92f2ef15 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -177,7 +177,7 @@ fn apply_dep_initial_distinct( // // This is because the aggregate we install on the right side will map the // correlated columns to their respective indices as shown. - assert!(!correlated_col_indices.is_empty()); + debug_assert!(!correlated_col_indices.is_empty()); let join_cond = match join.sq_type() { SubqueryType::Scalar | SubqueryType::Exists => LogOpPred::new( LogOpType::And, From fa7e41e14bac83a6df838c070b9cfe87380d2bb9 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Mon, 9 Dec 2024 00:08:01 -0500 Subject: [PATCH 45/47] One more clippy warning... --- optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 92f2ef15..df081ca0 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -200,7 +200,7 @@ fn apply_dep_initial_distinct( correlated_col_indices .iter() .enumerate() - .map(|(i, x)| { + .map(|(i, _)| { assert!(i + left_schema_size < left_schema_size + right_schema_size); BinOpPred::new( pred.clone().into(), From 34e70f058f11eb6f836678cedc5b584f9ddb91ed Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Mon, 9 Dec 2024 00:17:26 -0500 Subject: [PATCH 46/47] Fix assertion bug for Q20 and Q22 --- optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs | 5 +++-- .../slt/{tpch-q20.slt.disabled => tpch-q20.slt} | 0 .../slt/{tpch-q22.slt.disabled => tpch-q22.slt} | 0 3 files changed, 3 insertions(+), 2 deletions(-) rename optd-sqllogictest/slt/{tpch-q20.slt.disabled => tpch-q20.slt} (100%) rename optd-sqllogictest/slt/{tpch-q22.slt.disabled => tpch-q22.slt} (100%) diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index df081ca0..67a7164f 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -166,6 +166,7 @@ fn apply_dep_initial_distinct( ), ); + let new_dep_join_schema_size = correlated_col_indices.len() + right_schema_size; let new_dep_join = DependentJoin::new_unchecked(distinct_agg_node.into_plan_node(), right, cond, extern_cols); @@ -185,7 +186,7 @@ fn apply_dep_initial_distinct( .iter() .enumerate() .map(|(i, x)| { - assert!(i + left_schema_size < left_schema_size + right_schema_size); + assert!(i + left_schema_size < left_schema_size + new_dep_join_schema_size); BinOpPred::new( ColumnRefPred::new(*x).into_pred_node(), ColumnRefPred::new(i + left_schema_size).into_pred_node(), @@ -201,7 +202,7 @@ fn apply_dep_initial_distinct( .iter() .enumerate() .map(|(i, _)| { - assert!(i + left_schema_size < left_schema_size + right_schema_size); + assert!(i + left_schema_size < left_schema_size + new_dep_join_schema_size); BinOpPred::new( pred.clone().into(), ColumnRefPred::new(i + left_schema_size).into_pred_node(), diff --git a/optd-sqllogictest/slt/tpch-q20.slt.disabled b/optd-sqllogictest/slt/tpch-q20.slt similarity index 100% rename from optd-sqllogictest/slt/tpch-q20.slt.disabled rename to optd-sqllogictest/slt/tpch-q20.slt diff --git a/optd-sqllogictest/slt/tpch-q22.slt.disabled b/optd-sqllogictest/slt/tpch-q22.slt similarity index 100% rename from optd-sqllogictest/slt/tpch-q22.slt.disabled rename to optd-sqllogictest/slt/tpch-q22.slt From b741c83caf4282c1248e02f7ad44cd48b4b57af9 Mon Sep 17 00:00:00 2001 From: Benjamin O Date: Tue, 10 Dec 2024 17:09:27 -0500 Subject: [PATCH 47/47] Add new queries to sqlplannertest + delete extraneous file --- .../tests/tpch/q16.planner.sql | 106 +++ optd-sqlplannertest/tests/tpch/q16.yml | 35 + .../tests/tpch/q20.planner.sql | 194 ++++++ optd-sqlplannertest/tests/tpch/q20.yml | 42 ++ .../tests/tpch/q22.planner.sql | 170 +++++ optd-sqlplannertest/tests/tpch/q22.yml | 42 ++ optd-sqlplannertest/tests/tpch/q4.planner.sql | 91 +++ optd-sqlplannertest/tests/tpch/q4.yml | 26 + .../tests/tpch/tpch-01-05.planner.sql | 628 ------------------ 9 files changed, 706 insertions(+), 628 deletions(-) create mode 100644 optd-sqlplannertest/tests/tpch/q16.planner.sql create mode 100644 optd-sqlplannertest/tests/tpch/q16.yml create mode 100644 optd-sqlplannertest/tests/tpch/q20.planner.sql create mode 100644 optd-sqlplannertest/tests/tpch/q20.yml create mode 100644 optd-sqlplannertest/tests/tpch/q22.planner.sql create mode 100644 optd-sqlplannertest/tests/tpch/q22.yml create mode 100644 optd-sqlplannertest/tests/tpch/q4.planner.sql create mode 100644 optd-sqlplannertest/tests/tpch/q4.yml delete mode 100644 optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql diff --git a/optd-sqlplannertest/tests/tpch/q16.planner.sql b/optd-sqlplannertest/tests/tpch/q16.planner.sql new file mode 100644 index 00000000..b1adf6f6 --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q16.planner.sql @@ -0,0 +1,106 @@ +-- TPC-H Q16 +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; + +/* +LogicalSort +├── exprs: +│ ┌── SortOrder { order: Desc } +│ │ └── #3 +│ ├── SortOrder { order: Asc } +│ │ └── #0 +│ ├── SortOrder { order: Asc } +│ │ └── #1 +│ └── SortOrder { order: Asc } +│ └── #2 +└── LogicalProjection { exprs: [ #0, #1, #2, #3 ] } + └── LogicalAgg + ├── exprs:Agg(Count) + │ └── [ #1 ] + ├── groups: [ #8, #9, #10 ] + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #5 + │ │ └── #0 + │ ├── Neq + │ │ ├── #8 + │ │ └── "Brand#45" + │ ├── Like { expr: #9, pattern: "MEDIUM POLISHED%", negated: true, case_insensitive: false } + │ ├── InList { expr: Cast { cast_to: Int64, child: #10 }, list: [ 49(i64), 14(i64), 23(i64), 45(i64), 19(i64), 3(i64), 36(i64), 9(i64) ], negated: false } + │ └── Not + │ └── [ #14 ] + └── RawDependentJoin { sq_type: Any { pred: PredNode { typ: ColumnRef, children: [], data: Some(UInt64(1)) }, op: Eq }, cond: true, extern_cols: [] } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalScan { table: partsupp } + │ └── LogicalScan { table: part } + └── LogicalProjection { exprs: [ #0 ] } + └── LogicalFilter { cond: Like { expr: #6, pattern: "%Customer%Complaints%", negated: false, case_insensitive: false } } + └── LogicalScan { table: supplier } +PhysicalSort +├── exprs: +│ ┌── SortOrder { order: Desc } +│ │ └── #3 +│ ├── SortOrder { order: Asc } +│ │ └── #0 +│ ├── SortOrder { order: Asc } +│ │ └── #1 +│ └── SortOrder { order: Asc } +│ └── #2 +└── PhysicalAgg + ├── aggrs:Agg(Count) + │ └── [ #1 ] + ├── groups: [ #8, #9, #10 ] + └── PhysicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #5 + │ │ └── #0 + │ ├── Neq + │ │ ├── #8 + │ │ └── "Brand#45" + │ ├── Like { expr: #9, pattern: "MEDIUM POLISHED%", negated: true, case_insensitive: false } + │ ├── InList { expr: Cast { cast_to: Int64, child: #10 }, list: [ 49(i64), 14(i64), 23(i64), 45(i64), 19(i64), 3(i64), 36(i64), 9(i64) ], negated: false } + │ └── Not + │ └── [ #14 ] + └── PhysicalNestedLoopJoin + ├── join_type: LeftMark + ├── cond:Eq + │ ├── #1 + │ └── #14 + ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalScan { table: partsupp } + │ └── PhysicalScan { table: part } + └── PhysicalProjection { exprs: [ #0 ] } + └── PhysicalFilter { cond: Like { expr: #6, pattern: "%Customer%Complaints%", negated: false, case_insensitive: false } } + └── PhysicalScan { table: supplier } +*/ + diff --git a/optd-sqlplannertest/tests/tpch/q16.yml b/optd-sqlplannertest/tests/tpch/q16.yml new file mode 100644 index 00000000..dfcb58ae --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q16.yml @@ -0,0 +1,35 @@ +- sql: | + select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt + from + partsupp, + part + where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) + group by + p_brand, + p_type, + p_size + order by + supplier_cnt desc, + p_brand, + p_type, + p_size; + desc: TPC-H Q16 + before: ["include_sql:schema.sql"] + tasks: + - explain:logical_optd,physical_optd diff --git a/optd-sqlplannertest/tests/tpch/q20.planner.sql b/optd-sqlplannertest/tests/tpch/q20.planner.sql new file mode 100644 index 00000000..e5e6065d --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q20.planner.sql @@ -0,0 +1,194 @@ +-- TPC-H Q20 +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'indian%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'IRAQ' +order by + s_name; + +/* +LogicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── LogicalProjection { exprs: [ #1, #2 ] } + └── LogicalFilter + ├── cond:And + │ ├── #11 + │ ├── Eq + │ │ ├── #3 + │ │ └── #7 + │ └── Eq + │ ├── #8 + │ └── "IRAQ" + └── RawDependentJoin { sq_type: Any { pred: PredNode { typ: ColumnRef, children: [], data: Some(UInt64(0)) }, op: Eq }, cond: true, extern_cols: [] } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalScan { table: supplier } + │ └── LogicalScan { table: nation } + └── LogicalProjection { exprs: [ #1 ] } + └── LogicalFilter + ├── cond:And + │ ├── #5 + │ └── Gt + │ ├── Cast { cast_to: Float64, child: #2 } + │ └── #6 + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0), Extern(#1) ] } + ├── RawDependentJoin { sq_type: Any { pred: PredNode { typ: ColumnRef, children: [], data: Some(UInt64(0)) }, op: Eq }, cond: true, extern_cols: [] } + │ ├── LogicalScan { table: partsupp } + │ └── LogicalProjection { exprs: [ #0 ] } + │ └── LogicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + │ └── LogicalScan { table: part } + └── LogicalProjection + ├── exprs:Mul + │ ├── 0.5(float) + │ └── Cast { cast_to: Float64, child: #0 } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ #4 ] + ├── groups: [] + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #1 + │ │ └── Extern(#0) + │ ├── Eq + │ │ ├── #2 + │ │ └── Extern(#1) + │ ├── Geq + │ │ ├── #10 + │ │ └── Cast { cast_to: Date32, child: "1996-01-01" } + │ └── Lt + │ ├── #10 + │ └── Add + │ ├── Cast { cast_to: Date32, child: "1996-01-01" } + │ └── INTERVAL_MONTH_DAY_NANO (12, 0, 0) + └── LogicalScan { table: lineitem } +PhysicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── PhysicalProjection { exprs: [ #1, #2 ] } + └── PhysicalFilter + ├── cond:And + │ ├── #11 + │ ├── Eq + │ │ ├── #3 + │ │ └── #7 + │ └── Eq + │ ├── #8 + │ └── "IRAQ" + └── PhysicalNestedLoopJoin + ├── join_type: LeftMark + ├── cond:Eq + │ ├── #0 + │ └── #11 + ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalScan { table: supplier } + │ └── PhysicalScan { table: nation } + └── PhysicalProjection { exprs: [ #4 ] } + └── PhysicalFilter + ├── cond:And + │ ├── #8 + │ └── Gt + │ ├── Cast { cast_to: Float64, child: #5 } + │ └── #2 + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #1 ], right_keys: [ #0, #1 ] } + ├── PhysicalProjection + │ ├── exprs: + │ │ ┌── #0 + │ │ ├── #1 + │ │ └── Mul + │ │ ├── 0.5(float) + │ │ └── Cast { cast_to: Float64, child: #2 } + │ └── PhysicalProjection { exprs: [ #0, #1, #4 ] } + │ └── PhysicalNestedLoopJoin + │ ├── join_type: LeftOuter + │ ├── cond:And + │ │ ├── Eq + │ │ │ ├── #0 + │ │ │ └── #2 + │ │ └── Eq + │ │ ├── #1 + │ │ └── #3 + │ ├── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] } + │ │ └── PhysicalNestedLoopJoin + │ │ ├── join_type: LeftMark + │ │ ├── cond:Eq + │ │ │ ├── #0 + │ │ │ └── #5 + │ │ ├── PhysicalScan { table: partsupp } + │ │ └── PhysicalProjection { exprs: [ #0 ] } + │ │ └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + │ │ └── PhysicalScan { table: part } + │ └── PhysicalAgg + │ ├── aggrs:Agg(Sum) + │ │ └── [ #6 ] + │ ├── groups: [ #0, #1 ] + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Eq + │ │ │ ├── #3 + │ │ │ └── #0 + │ │ ├── Eq + │ │ │ ├── #4 + │ │ │ └── #1 + │ │ ├── Geq + │ │ │ ├── #12 + │ │ │ └── Cast { cast_to: Date32, child: "1996-01-01" } + │ │ └── Lt + │ │ ├── #12 + │ │ └── Add + │ │ ├── Cast { cast_to: Date32, child: "1996-01-01" } + │ │ └── INTERVAL_MONTH_DAY_NANO (12, 0, 0) + │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] } + │ │ └── PhysicalNestedLoopJoin + │ │ ├── join_type: LeftMark + │ │ ├── cond:Eq + │ │ │ ├── #0 + │ │ │ └── #5 + │ │ ├── PhysicalScan { table: partsupp } + │ │ └── PhysicalProjection { exprs: [ #0 ] } + │ │ └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + │ │ └── PhysicalScan { table: part } + │ └── PhysicalScan { table: lineitem } + └── PhysicalNestedLoopJoin + ├── join_type: LeftMark + ├── cond:Eq + │ ├── #0 + │ └── #5 + ├── PhysicalScan { table: partsupp } + └── PhysicalProjection { exprs: [ #0 ] } + └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + └── PhysicalScan { table: part } +*/ + diff --git a/optd-sqlplannertest/tests/tpch/q20.yml b/optd-sqlplannertest/tests/tpch/q20.yml new file mode 100644 index 00000000..e94ebb5e --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q20.yml @@ -0,0 +1,42 @@ +- sql: | + select + s_name, + s_address + from + supplier, + nation + where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'indian%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'IRAQ' + order by + s_name; + desc: TPC-H Q20 + before: ["include_sql:schema.sql"] + tasks: + - explain:logical_optd,physical_optd diff --git a/optd-sqlplannertest/tests/tpch/q22.planner.sql b/optd-sqlplannertest/tests/tpch/q22.planner.sql new file mode 100644 index 00000000..89f93534 --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q22.planner.sql @@ -0,0 +1,170 @@ +-- TPC-H Q22 +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substring(c_phone from 1 for 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode; + +/* +LogicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalAgg + ├── exprs: + │ ┌── Agg(Count) + │ │ └── [ 1(i64) ] + │ └── Agg(Sum) + │ └── [ #1 ] + ├── groups: [ #0 ] + └── LogicalProjection + ├── exprs: + │ ┌── Scalar(Substr) + │ │ └── [ #4, 1(i64), 2(i64) ] + │ └── #5 + └── LogicalFilter + ├── cond:And + │ ├── InList + │ │ ├── expr:Scalar(Substr) + │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ ├── negated: false + + │ ├── Gt + │ │ ├── Cast { cast_to: Decimal128(19, 6), child: #5 } + │ │ └── #8 + │ └── Not + │ └── [ #9 ] + └── RawDependentJoin { sq_type: Exists, cond: true, extern_cols: [ Extern(#0) ] } + ├── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [] } + │ ├── LogicalScan { table: customer } + │ └── LogicalProjection { exprs: [ #0 ] } + │ └── LogicalAgg + │ ├── exprs:Agg(Avg) + │ │ └── [ #5 ] + │ ├── groups: [] + │ └── LogicalFilter + │ ├── cond:And + │ │ ├── Gt + │ │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #5 } + │ │ │ └── Cast { cast_to: Decimal128(30, 15), child: 0(float) } + │ │ └── InList + │ │ ├── expr:Scalar(Substr) + │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ ├── negated: false + + │ └── LogicalScan { table: customer } + └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8 ] } + └── LogicalFilter + ├── cond:Eq + │ ├── #1 + │ └── Extern(#0) + └── LogicalScan { table: orders } +PhysicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── PhysicalAgg + ├── aggrs: + │ ┌── Agg(Count) + │ │ └── [ 1(i64) ] + │ └── Agg(Sum) + │ └── [ #1 ] + ├── groups: [ #0 ] + └── PhysicalProjection + ├── exprs: + │ ┌── Scalar(Substr) + │ │ └── [ #4, 1(i64), 2(i64) ] + │ └── #5 + └── PhysicalFilter + ├── cond:And + │ ├── InList + │ │ ├── expr:Scalar(Substr) + │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ ├── negated: false + + │ ├── Gt + │ │ ├── Cast { cast_to: Decimal128(19, 6), child: #5 } + │ │ └── #8 + │ └── Not + │ └── [ #9 ] + └── PhysicalNestedLoopJoin + ├── join_type: LeftMark + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #9 + ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalScan { table: customer } + │ └── PhysicalAgg + │ ├── aggrs:Agg(Avg) + │ │ └── [ #5 ] + │ ├── groups: [] + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Gt + │ │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #5 } + │ │ │ └── Cast { cast_to: Decimal128(30, 15), child: 0(float) } + │ │ └── InList + │ │ ├── expr:Scalar(Substr) + │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ ├── negated: false + + │ └── PhysicalScan { table: customer } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } + │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalScan { table: customer } + │ └── PhysicalAgg + │ ├── aggrs:Agg(Avg) + │ │ └── [ #5 ] + │ ├── groups: [] + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Gt + │ │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #5 } + │ │ │ └── Cast { cast_to: Decimal128(30, 15), child: 0(float) } + │ │ └── InList + │ │ ├── expr:Scalar(Substr) + │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ ├── negated: false + + │ └── PhysicalScan { table: customer } + └── PhysicalScan { table: orders } +*/ + diff --git a/optd-sqlplannertest/tests/tpch/q22.yml b/optd-sqlplannertest/tests/tpch/q22.yml new file mode 100644 index 00000000..55678824 --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q22.yml @@ -0,0 +1,42 @@ +- sql: | + select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal + from + ( + select + substring(c_phone from 1 for 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale + group by + cntrycode + order by + cntrycode; + desc: TPC-H Q22 + before: ["include_sql:schema.sql"] + tasks: + - explain:logical_optd,physical_optd diff --git a/optd-sqlplannertest/tests/tpch/q4.planner.sql b/optd-sqlplannertest/tests/tpch/q4.planner.sql new file mode 100644 index 00000000..99bc78dc --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q4.planner.sql @@ -0,0 +1,91 @@ +-- TPC-H Q4 +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; + +/* +LogicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── LogicalProjection { exprs: [ #0, #1 ] } + └── LogicalAgg + ├── exprs:Agg(Count) + │ └── [ 1(i64) ] + ├── groups: [ #5 ] + └── LogicalFilter + ├── cond:And + │ ├── Geq + │ │ ├── #4 + │ │ └── Cast { cast_to: Date32, child: "1993-07-01" } + │ ├── Lt + │ │ ├── #4 + │ │ └── Add + │ │ ├── Cast { cast_to: Date32, child: "1993-07-01" } + │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) + │ └── #9 + └── RawDependentJoin { sq_type: Exists, cond: true, extern_cols: [ Extern(#0) ] } + ├── LogicalScan { table: orders } + └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15 ] } + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── Extern(#0) + │ └── Lt + │ ├── #11 + │ └── #12 + └── LogicalScan { table: lineitem } +PhysicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── PhysicalAgg + ├── aggrs:Agg(Count) + │ └── [ 1(i64) ] + ├── groups: [ #5 ] + └── PhysicalFilter + ├── cond:And + │ ├── Geq + │ │ ├── #4 + │ │ └── Cast { cast_to: Date32, child: "1993-07-01" } + │ ├── Lt + │ │ ├── #4 + │ │ └── Add + │ │ ├── Cast { cast_to: Date32, child: "1993-07-01" } + │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) + │ └── #9 + └── PhysicalNestedLoopJoin + ├── join_type: LeftMark + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #9 + ├── PhysicalScan { table: orders } + └── PhysicalProjection { exprs: [ #16, #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + ├── PhysicalFilter + │ ├── cond:Lt + │ │ ├── #11 + │ │ └── #12 + │ └── PhysicalScan { table: lineitem } + └── PhysicalAgg { aggrs: [], groups: [ #0 ] } + └── PhysicalScan { table: orders } +*/ + diff --git a/optd-sqlplannertest/tests/tpch/q4.yml b/optd-sqlplannertest/tests/tpch/q4.yml new file mode 100644 index 00000000..a30999b2 --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q4.yml @@ -0,0 +1,26 @@ +- sql: | + select + o_orderpriority, + count(*) as order_count + from + orders + where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) + group by + o_orderpriority + order by + o_orderpriority; + desc: TPC-H Q4 + before: ["include_sql:schema.sql"] + tasks: + - explain:logical_optd,physical_optd diff --git a/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql b/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql deleted file mode 100644 index f917e394..00000000 --- a/optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql +++ /dev/null @@ -1,628 +0,0 @@ --- TPC-H schema -CREATE TABLE NATION ( - N_NATIONKEY INT NOT NULL, - N_NAME CHAR(25) NOT NULL, - N_REGIONKEY INT NOT NULL, - N_COMMENT VARCHAR(152) -); - -CREATE TABLE REGION ( - R_REGIONKEY INT NOT NULL, - R_NAME CHAR(25) NOT NULL, - R_COMMENT VARCHAR(152) -); - -CREATE TABLE PART ( - P_PARTKEY INT NOT NULL, - P_NAME VARCHAR(55) NOT NULL, - P_MFGR CHAR(25) NOT NULL, - P_BRAND CHAR(10) NOT NULL, - P_TYPE VARCHAR(25) NOT NULL, - P_SIZE INT NOT NULL, - P_CONTAINER CHAR(10) NOT NULL, - P_RETAILPRICE DECIMAL(15,2) NOT NULL, - P_COMMENT VARCHAR(23) NOT NULL -); - -CREATE TABLE SUPPLIER ( - S_SUPPKEY INT NOT NULL, - S_NAME CHAR(25) NOT NULL, - S_ADDRESS VARCHAR(40) NOT NULL, - S_NATIONKEY INT NOT NULL, - S_PHONE CHAR(15) NOT NULL, - S_ACCTBAL DECIMAL(15,2) NOT NULL, - S_COMMENT VARCHAR(101) NOT NULL -); - -CREATE TABLE PARTSUPP ( - PS_PARTKEY INT NOT NULL, - PS_SUPPKEY INT NOT NULL, - PS_AVAILQTY INT NOT NULL, - PS_SUPPLYCOST DECIMAL(15,2) NOT NULL, - PS_COMMENT VARCHAR(199) NOT NULL -); - -CREATE TABLE CUSTOMER ( - C_CUSTKEY INT NOT NULL, - C_NAME VARCHAR(25) NOT NULL, - C_ADDRESS VARCHAR(40) NOT NULL, - C_NATIONKEY INT NOT NULL, - C_PHONE CHAR(15) NOT NULL, - C_ACCTBAL DECIMAL(15,2) NOT NULL, - C_MKTSEGMENT CHAR(10) NOT NULL, - C_COMMENT VARCHAR(117) NOT NULL -); - -CREATE TABLE ORDERS ( - O_ORDERKEY INT NOT NULL, - O_CUSTKEY INT NOT NULL, - O_ORDERSTATUS CHAR(1) NOT NULL, - O_TOTALPRICE DECIMAL(15,2) NOT NULL, - O_ORDERDATE DATE NOT NULL, - O_ORDERPRIORITY CHAR(15) NOT NULL, - O_CLERK CHAR(15) NOT NULL, - O_SHIPPRIORITY INT NOT NULL, - O_COMMENT VARCHAR(79) NOT NULL -); - -CREATE TABLE LINEITEM ( - L_ORDERKEY INT NOT NULL, - L_PARTKEY INT NOT NULL, - L_SUPPKEY INT NOT NULL, - L_LINENUMBER INT NOT NULL, - L_QUANTITY DECIMAL(15,2) NOT NULL, - L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, - L_DISCOUNT DECIMAL(15,2) NOT NULL, - L_TAX DECIMAL(15,2) NOT NULL, - L_RETURNFLAG CHAR(1) NOT NULL, - L_LINESTATUS CHAR(1) NOT NULL, - L_SHIPDATE DATE NOT NULL, - L_COMMITDATE DATE NOT NULL, - L_RECEIPTDATE DATE NOT NULL, - L_SHIPINSTRUCT CHAR(25) NOT NULL, - L_SHIPMODE CHAR(10) NOT NULL, - L_COMMENT VARCHAR(44) NOT NULL -); - -/* - -*/ - --- TPC-H Q1 -SELECT - l_returnflag, - l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, - count(*) as count_order -FROM - lineitem -WHERE - l_shipdate <= date '1998-12-01' - interval '90' day -GROUP BY - l_returnflag, l_linestatus -ORDER BY - l_returnflag, l_linestatus; - -/* -LogicalSort -├── exprs: -│ ┌── SortOrder { order: Asc } -│ │ └── #0 -│ └── SortOrder { order: Asc } -│ └── #1 -└── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8, #9 ] } - └── LogicalAgg - ├── exprs: - │ ┌── Agg(Sum) - │ │ └── [ #4 ] - │ ├── Agg(Sum) - │ │ └── [ #5 ] - │ ├── Agg(Sum) - │ │ └── Mul - │ │ ├── #5 - │ │ └── Sub - │ │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ │ └── #6 - │ ├── Agg(Sum) - │ │ └── Mul - │ │ ├── Mul - │ │ │ ├── #5 - │ │ │ └── Sub - │ │ │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ │ │ └── #6 - │ │ └── Add - │ │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ │ └── #7 - │ ├── Agg(Avg) - │ │ └── [ #4 ] - │ ├── Agg(Avg) - │ │ └── [ #5 ] - │ ├── Agg(Avg) - │ │ └── [ #6 ] - │ └── Agg(Count) - │ └── [ 1(u8) ] - ├── groups: [ #8, #9 ] - └── LogicalFilter - ├── cond:Leq - │ ├── #10 - │ └── Sub - │ ├── Cast { cast_to: Date32, child: "1998-12-01" } - │ └── INTERVAL_MONTH_DAY_NANO (0, 90, 0) - └── LogicalScan { table: lineitem } -PhysicalSort -├── exprs: -│ ┌── SortOrder { order: Asc } -│ │ └── #0 -│ └── SortOrder { order: Asc } -│ └── #1 -└── PhysicalAgg - ├── aggrs: - │ ┌── Agg(Sum) - │ │ └── [ #4 ] - │ ├── Agg(Sum) - │ │ └── [ #5 ] - │ ├── Agg(Sum) - │ │ └── Mul - │ │ ├── #5 - │ │ └── Sub - │ │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ │ └── #6 - │ ├── Agg(Sum) - │ │ └── Mul - │ │ ├── Mul - │ │ │ ├── #5 - │ │ │ └── Sub - │ │ │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ │ │ └── #6 - │ │ └── Add - │ │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ │ └── #7 - │ ├── Agg(Avg) - │ │ └── [ #4 ] - │ ├── Agg(Avg) - │ │ └── [ #5 ] - │ ├── Agg(Avg) - │ │ └── [ #6 ] - │ └── Agg(Count) - │ └── [ 1(u8) ] - ├── groups: [ #8, #9 ] - └── PhysicalFilter - ├── cond:Leq - │ ├── #10 - │ └── Sub - │ ├── Cast { cast_to: Date32, child: "1998-12-01" } - │ └── INTERVAL_MONTH_DAY_NANO (0, 90, 0) - └── PhysicalScan { table: lineitem } -*/ - --- TPC-H Q2 -select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment -from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey -and p_size = 4 -and p_type like '%TIN' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AFRICA' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AFRICA' - ) -order by - s_acctbal desc, - n_name, - s_name, - p_partkey -limit 100; - -/* -LogicalLimit { skip: 0(u64), fetch: 100(u64) } -└── LogicalSort - ├── exprs: - │ ┌── SortOrder { order: Desc } - │ │ └── #0 - │ ├── SortOrder { order: Asc } - │ │ └── #2 - │ ├── SortOrder { order: Asc } - │ │ └── #1 - │ └── SortOrder { order: Asc } - │ └── #3 - └── LogicalProjection { exprs: [ #14, #10, #22, #0, #2, #11, #13, #15 ] } - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #16 - │ ├── Eq - │ │ ├── #9 - │ │ └── #17 - │ ├── Eq - │ │ ├── Cast { cast_to: Int64, child: #5 } - │ │ └── 4(i64) - │ ├── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false } - │ ├── Eq - │ │ ├── #12 - │ │ └── #21 - │ ├── Eq - │ │ ├── #23 - │ │ └── #25 - │ ├── Eq - │ │ ├── #26 - │ │ └── "AFRICA" - │ └── Eq - │ ├── #19 - │ └── #28 - └── RawDependentJoin { join_type: Inner, cond: true, extern_cols: [ Extern(#0) ] } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ ├── LogicalScan { table: part } - │ │ │ │ └── LogicalScan { table: supplier } - │ │ │ └── LogicalScan { table: partsupp } - │ │ └── LogicalScan { table: nation } - │ └── LogicalScan { table: region } - └── LogicalProjection { exprs: [ #0 ] } - └── LogicalAgg - ├── exprs:Agg(Min) - │ └── [ #3 ] - ├── groups: [] - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── Extern(#0) - │ │ └── #0 - │ ├── Eq - │ │ ├── #5 - │ │ └── #1 - │ ├── Eq - │ │ ├── #8 - │ │ └── #12 - │ ├── Eq - │ │ ├── #14 - │ │ └── #16 - │ └── Eq - │ ├── #17 - │ └── "AFRICA" - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalScan { table: partsupp } - │ │ └── LogicalScan { table: supplier } - │ └── LogicalScan { table: nation } - └── LogicalScan { table: region } -PhysicalLimit { skip: 0(u64), fetch: 100(u64) } -└── PhysicalSort - ├── exprs: - │ ┌── SortOrder { order: Desc } - │ │ └── #0 - │ ├── SortOrder { order: Asc } - │ │ └── #2 - │ ├── SortOrder { order: Asc } - │ │ └── #1 - │ └── SortOrder { order: Asc } - │ └── #3 - └── PhysicalProjection { exprs: [ #21, #17, #4, #7, #9, #18, #20, #22 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #26, #7 ], right_keys: [ #2, #0 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #7, #16 ], right_keys: [ #0, #1 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #12 ] } - │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #2 ] } - │ │ │ ├── PhysicalFilter - │ │ │ │ ├── cond:Eq - │ │ │ │ │ ├── #1 - │ │ │ │ │ └── "AFRICA" - │ │ │ │ └── PhysicalScan { table: region } - │ │ │ └── PhysicalScan { table: nation } - │ │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalFilter - │ │ │ ├── cond:And - │ │ │ │ ├── Eq - │ │ │ │ │ ├── Cast { cast_to: Int64, child: #5 } - │ │ │ │ │ └── 4(i64) - │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false } - │ │ │ └── PhysicalScan { table: part } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalScan { table: partsupp } - └── PhysicalNestedLoopJoin - ├── join_type: LeftOuter - ├── cond:And - │ └── Eq - │ ├── #0 - │ └── #1 - ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } - │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalScan { table: part } - │ │ │ │ └── PhysicalScan { table: supplier } - │ │ │ └── PhysicalScan { table: partsupp } - │ │ └── PhysicalScan { table: nation } - │ └── PhysicalScan { table: region } - └── PhysicalAgg - ├── aggrs:Agg(Min) - │ └── [ #4 ] - ├── groups: [ #0 ] - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #1 - │ ├── Eq - │ │ ├── #6 - │ │ └── #2 - │ ├── Eq - │ │ ├── #9 - │ │ └── #13 - │ ├── Eq - │ │ ├── #15 - │ │ └── #17 - │ └── Eq - │ ├── #18 - │ └── "AFRICA" - └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } - ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } - │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalScan { table: part } - │ │ │ │ └── PhysicalScan { table: supplier } - │ │ │ └── PhysicalScan { table: partsupp } - │ │ └── PhysicalScan { table: nation } - │ └── PhysicalScan { table: region } - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalScan { table: partsupp } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalScan { table: nation } - └── PhysicalScan { table: region } -*/ - --- TPC-H Q3 -SELECT - l_orderkey, - SUM(l_extendedprice * (1 - l_discount)) AS revenue, - o_orderdate, - o_shippriority -FROM - customer, - orders, - lineitem -WHERE - c_mktsegment = 'FURNITURE' - AND c_custkey = o_custkey - AND l_orderkey = o_orderkey - AND o_orderdate < DATE '1995-03-29' - AND l_shipdate > DATE '1995-03-29' -GROUP BY - l_orderkey, - o_orderdate, - o_shippriority -ORDER BY - revenue DESC, - o_orderdate LIMIT 10; - -/* -LogicalLimit { skip: 0(u64), fetch: 10(u64) } -└── LogicalSort - ├── exprs: - │ ┌── SortOrder { order: Desc } - │ │ └── #1 - │ └── SortOrder { order: Asc } - │ └── #2 - └── LogicalProjection { exprs: [ #0, #3, #1, #2 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── Mul - │ ├── #22 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ └── #23 - ├── groups: [ #17, #12, #15 ] - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #6 - │ │ └── "FURNITURE" - │ ├── Eq - │ │ ├── #0 - │ │ └── #9 - │ ├── Eq - │ │ ├── #17 - │ │ └── #8 - │ ├── Lt - │ │ ├── #12 - │ │ └── Cast { cast_to: Date32, child: "1995-03-29" } - │ └── Gt - │ ├── #27 - │ └── Cast { cast_to: Date32, child: "1995-03-29" } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalScan { table: customer } - │ └── LogicalScan { table: orders } - └── LogicalScan { table: lineitem } -PhysicalLimit { skip: 0(u64), fetch: 10(u64) } -└── PhysicalSort - ├── exprs: - │ ┌── SortOrder { order: Desc } - │ │ └── #1 - │ └── SortOrder { order: Asc } - │ └── #2 - └── PhysicalProjection { exprs: [ #0, #3, #1, #2 ] } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── Mul - │ ├── #22 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ └── #23 - ├── groups: [ #17, #12, #15 ] - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } - ├── PhysicalFilter - │ ├── cond:Eq - │ │ ├── #6 - │ │ └── "FURNITURE" - │ └── PhysicalScan { table: customer } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - ├── PhysicalFilter - │ ├── cond:Lt - │ │ ├── #4 - │ │ └── Cast { cast_to: Date32, child: "1995-03-29" } - │ └── PhysicalScan { table: orders } - └── PhysicalFilter - ├── cond:Gt - │ ├── #10 - │ └── Cast { cast_to: Date32, child: "1995-03-29" } - └── PhysicalScan { table: lineitem } -*/ - --- TPC-H Q5 -SELECT - n_name AS nation, - SUM(l_extendedprice * (1 - l_discount)) AS revenue -FROM - customer, - orders, - lineitem, - supplier, - nation, - region -WHERE - c_custkey = o_custkey - AND l_orderkey = o_orderkey - AND l_suppkey = s_suppkey - AND c_nationkey = s_nationkey - AND s_nationkey = n_nationkey - AND n_regionkey = r_regionkey - AND r_name = 'Asia' -- Specified region - AND o_orderdate >= DATE '2023-01-01' - AND o_orderdate < DATE '2024-01-01' -GROUP BY - n_name -ORDER BY - revenue DESC; - -/* -LogicalSort -├── exprs:SortOrder { order: Desc } -│ └── #1 -└── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── Mul - │ ├── #22 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ └── #23 - ├── groups: [ #41 ] - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #9 - │ ├── Eq - │ │ ├── #17 - │ │ └── #8 - │ ├── Eq - │ │ ├── #19 - │ │ └── #33 - │ ├── Eq - │ │ ├── #3 - │ │ └── #36 - │ ├── Eq - │ │ ├── #36 - │ │ └── #40 - │ ├── Eq - │ │ ├── #42 - │ │ └── #44 - │ ├── Eq - │ │ ├── #45 - │ │ └── "Asia" - │ ├── Geq - │ │ ├── #12 - │ │ └── Cast { cast_to: Date32, child: "2023-01-01" } - │ └── Lt - │ ├── #12 - │ └── Cast { cast_to: Date32, child: "2024-01-01" } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ ├── LogicalScan { table: customer } - │ │ │ │ └── LogicalScan { table: orders } - │ │ │ └── LogicalScan { table: lineitem } - │ │ └── LogicalScan { table: supplier } - │ └── LogicalScan { table: nation } - └── LogicalScan { table: region } -PhysicalSort -├── exprs:SortOrder { order: Desc } -│ └── #1 -└── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── Mul - │ ├── #22 - │ └── Sub - │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ └── #23 - ├── groups: [ #41 ] - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #42 ], right_keys: [ #0 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #36 ], right_keys: [ #0 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #3 ], right_keys: [ #0, #3 ] } - │ │ ├── PhysicalProjection { exprs: [ #25, #26, #27, #28, #29, #30, #31, #32, #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24 ] } - │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - │ │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ │ │ │ ├── PhysicalFilter - │ │ │ │ │ ├── cond:And - │ │ │ │ │ │ ├── Geq - │ │ │ │ │ │ │ ├── #4 - │ │ │ │ │ │ │ └── Cast { cast_to: Date32, child: "2023-01-01" } - │ │ │ │ │ │ └── Lt - │ │ │ │ │ │ ├── #4 - │ │ │ │ │ │ └── Cast { cast_to: Date32, child: "2024-01-01" } - │ │ │ │ │ └── PhysicalScan { table: orders } - │ │ │ │ └── PhysicalScan { table: lineitem } - │ │ │ └── PhysicalScan { table: customer } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalScan { table: nation } - └── PhysicalFilter - ├── cond:Eq - │ ├── #1 - │ └── "Asia" - └── PhysicalScan { table: region } -*/ -