Skip to content

Commit 981643d

Browse files
committed
Allow partitionwise join to handle nested FULL JOIN USING cases.
This case didn't work because columns merged by FULL JOIN USING are represented in the parse tree by COALESCE expressions, and the logic for recognizing a partitionable join failed to match upper-level join clauses to such expressions. To fix, synthesize suitable COALESCE expressions and add them to the nullable_partexprs lists. This is pretty ugly and brute-force, but it gets the job done. (I have ambitions of rethinking the way outer-join output Vars are represented, so maybe that will provide a cleaner solution someday. For now, do this.) Amit Langote, reviewed by Justin Pryzby, Richard Guo, and myself Discussion: https://postgr.es/m/CA+HiwqG2WVUGmLJqtR0tPFhniO=H=9qQ+Z3L_ZC+Y3-EVQHFGg@mail.gmail.com
1 parent c8434d6 commit 981643d

File tree

4 files changed

+174
-2
lines changed

4 files changed

+174
-2
lines changed

Diff for: src/backend/optimizer/util/relnode.c

+43-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <limits.h>
1818

1919
#include "miscadmin.h"
20+
#include "nodes/nodeFuncs.h"
2021
#include "optimizer/appendinfo.h"
2122
#include "optimizer/clauses.h"
2223
#include "optimizer/cost.h"
@@ -1875,7 +1876,8 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
18751876
RelOptInfo *outer_rel, RelOptInfo *inner_rel,
18761877
JoinType jointype)
18771878
{
1878-
int partnatts = joinrel->part_scheme->partnatts;
1879+
PartitionScheme part_scheme = joinrel->part_scheme;
1880+
int partnatts = part_scheme->partnatts;
18791881

18801882
joinrel->partexprs = (List **) palloc0(sizeof(List *) * partnatts);
18811883
joinrel->nullable_partexprs =
@@ -1884,7 +1886,8 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
18841886
/*
18851887
* The joinrel's partition expressions are the same as those of the input
18861888
* rels, but we must properly classify them as nullable or not in the
1887-
* joinrel's output.
1889+
* joinrel's output. (Also, we add some more partition expressions if
1890+
* it's a FULL JOIN.)
18881891
*/
18891892
for (int cnt = 0; cnt < partnatts; cnt++)
18901893
{
@@ -1895,6 +1898,7 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
18951898
const List *inner_null_expr = inner_rel->nullable_partexprs[cnt];
18961899
List *partexpr = NIL;
18971900
List *nullable_partexpr = NIL;
1901+
ListCell *lc;
18981902

18991903
switch (jointype)
19001904
{
@@ -1954,6 +1958,43 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
19541958
outer_null_expr);
19551959
nullable_partexpr = list_concat(nullable_partexpr,
19561960
inner_null_expr);
1961+
1962+
/*
1963+
* Also add CoalesceExprs corresponding to each possible
1964+
* full-join output variable (that is, left side coalesced to
1965+
* right side), so that we can match equijoin expressions
1966+
* using those variables. We really only need these for
1967+
* columns merged by JOIN USING, and only with the pairs of
1968+
* input items that correspond to the data structures that
1969+
* parse analysis would build for such variables. But it's
1970+
* hard to tell which those are, so just make all the pairs.
1971+
* Extra items in the nullable_partexprs list won't cause big
1972+
* problems. (It's possible that such items will get matched
1973+
* to user-written COALESCEs, but it should still be valid to
1974+
* partition on those, since they're going to be either the
1975+
* partition column or NULL; it's the same argument as for
1976+
* partitionwise nesting of any outer join.) We assume no
1977+
* type coercions are needed to make the coalesce expressions,
1978+
* since columns of different types won't have gotten
1979+
* classified as the same PartitionScheme.
1980+
*/
1981+
foreach(lc, list_concat_copy(outer_expr, outer_null_expr))
1982+
{
1983+
Node *larg = (Node *) lfirst(lc);
1984+
ListCell *lc2;
1985+
1986+
foreach(lc2, list_concat_copy(inner_expr, inner_null_expr))
1987+
{
1988+
Node *rarg = (Node *) lfirst(lc2);
1989+
CoalesceExpr *c = makeNode(CoalesceExpr);
1990+
1991+
c->coalescetype = exprType(larg);
1992+
c->coalescecollid = exprCollation(larg);
1993+
c->args = list_make2(larg, rarg);
1994+
c->location = -1;
1995+
nullable_partexpr = lappend(nullable_partexpr, c);
1996+
}
1997+
}
19571998
break;
19581999

19592000
default:

Diff for: src/include/nodes/pathnodes.h

+3
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,9 @@ typedef struct PartitionSchemeData *PartitionScheme;
615615
* that expression goes in the partexprs[i] list if the base relation
616616
* is not nullable by this join or any lower outer join, or in the
617617
* nullable_partexprs[i] list if the base relation is nullable.
618+
* Furthermore, FULL JOINs add extra nullable_partexprs expressions
619+
* corresponding to COALESCE expressions of the left and right join columns,
620+
* to simplify matching join clauses to those lists.
618621
*----------
619622
*/
620623
typedef enum RelOptKind

Diff for: src/test/regress/expected/partition_join.out

+110
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,116 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2
750750
550 | 0550 | | | 1100 | 0
751751
(12 rows)
752752

753+
--
754+
-- 3-way full join
755+
--
756+
EXPLAIN (COSTS OFF)
757+
SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b)
758+
WHERE a BETWEEN 490 AND 510;
759+
QUERY PLAN
760+
-----------------------------------------------------------------------------------------------------------------------------------------
761+
Aggregate
762+
-> Append
763+
-> Hash Full Join
764+
Hash Cond: ((COALESCE(prt1_1.a, p2_1.a) = p3_1.a) AND (COALESCE(prt1_1.b, p2_1.b) = p3_1.b))
765+
Filter: ((COALESCE(COALESCE(prt1_1.a, p2_1.a), p3_1.a) >= 490) AND (COALESCE(COALESCE(prt1_1.a, p2_1.a), p3_1.a) <= 510))
766+
-> Hash Full Join
767+
Hash Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
768+
-> Seq Scan on prt1_p1 prt1_1
769+
-> Hash
770+
-> Seq Scan on prt2_p1 p2_1
771+
-> Hash
772+
-> Seq Scan on prt2_p1 p3_1
773+
-> Hash Full Join
774+
Hash Cond: ((COALESCE(prt1_2.a, p2_2.a) = p3_2.a) AND (COALESCE(prt1_2.b, p2_2.b) = p3_2.b))
775+
Filter: ((COALESCE(COALESCE(prt1_2.a, p2_2.a), p3_2.a) >= 490) AND (COALESCE(COALESCE(prt1_2.a, p2_2.a), p3_2.a) <= 510))
776+
-> Hash Full Join
777+
Hash Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
778+
-> Seq Scan on prt1_p2 prt1_2
779+
-> Hash
780+
-> Seq Scan on prt2_p2 p2_2
781+
-> Hash
782+
-> Seq Scan on prt2_p2 p3_2
783+
-> Hash Full Join
784+
Hash Cond: ((COALESCE(prt1_3.a, p2_3.a) = p3_3.a) AND (COALESCE(prt1_3.b, p2_3.b) = p3_3.b))
785+
Filter: ((COALESCE(COALESCE(prt1_3.a, p2_3.a), p3_3.a) >= 490) AND (COALESCE(COALESCE(prt1_3.a, p2_3.a), p3_3.a) <= 510))
786+
-> Hash Full Join
787+
Hash Cond: ((prt1_3.a = p2_3.a) AND (prt1_3.b = p2_3.b))
788+
-> Seq Scan on prt1_p3 prt1_3
789+
-> Hash
790+
-> Seq Scan on prt2_p3 p2_3
791+
-> Hash
792+
-> Seq Scan on prt2_p3 p3_3
793+
(32 rows)
794+
795+
SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b)
796+
WHERE a BETWEEN 490 AND 510;
797+
count
798+
-------
799+
14
800+
(1 row)
801+
802+
--
803+
-- 4-way full join
804+
--
805+
EXPLAIN (COSTS OFF)
806+
SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b) FULL JOIN prt1 p4 (a,b,c) USING (a, b)
807+
WHERE a BETWEEN 490 AND 510;
808+
QUERY PLAN
809+
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
810+
Aggregate
811+
-> Append
812+
-> Hash Full Join
813+
Hash Cond: ((COALESCE(COALESCE(prt1_1.a, p2_1.a), p3_1.a) = p4_1.a) AND (COALESCE(COALESCE(prt1_1.b, p2_1.b), p3_1.b) = p4_1.b))
814+
Filter: ((COALESCE(COALESCE(COALESCE(prt1_1.a, p2_1.a), p3_1.a), p4_1.a) >= 490) AND (COALESCE(COALESCE(COALESCE(prt1_1.a, p2_1.a), p3_1.a), p4_1.a) <= 510))
815+
-> Hash Full Join
816+
Hash Cond: ((COALESCE(prt1_1.a, p2_1.a) = p3_1.a) AND (COALESCE(prt1_1.b, p2_1.b) = p3_1.b))
817+
-> Hash Full Join
818+
Hash Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
819+
-> Seq Scan on prt1_p1 prt1_1
820+
-> Hash
821+
-> Seq Scan on prt2_p1 p2_1
822+
-> Hash
823+
-> Seq Scan on prt2_p1 p3_1
824+
-> Hash
825+
-> Seq Scan on prt1_p1 p4_1
826+
-> Hash Full Join
827+
Hash Cond: ((COALESCE(COALESCE(prt1_2.a, p2_2.a), p3_2.a) = p4_2.a) AND (COALESCE(COALESCE(prt1_2.b, p2_2.b), p3_2.b) = p4_2.b))
828+
Filter: ((COALESCE(COALESCE(COALESCE(prt1_2.a, p2_2.a), p3_2.a), p4_2.a) >= 490) AND (COALESCE(COALESCE(COALESCE(prt1_2.a, p2_2.a), p3_2.a), p4_2.a) <= 510))
829+
-> Hash Full Join
830+
Hash Cond: ((COALESCE(prt1_2.a, p2_2.a) = p3_2.a) AND (COALESCE(prt1_2.b, p2_2.b) = p3_2.b))
831+
-> Hash Full Join
832+
Hash Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
833+
-> Seq Scan on prt1_p2 prt1_2
834+
-> Hash
835+
-> Seq Scan on prt2_p2 p2_2
836+
-> Hash
837+
-> Seq Scan on prt2_p2 p3_2
838+
-> Hash
839+
-> Seq Scan on prt1_p2 p4_2
840+
-> Hash Full Join
841+
Hash Cond: ((COALESCE(COALESCE(prt1_3.a, p2_3.a), p3_3.a) = p4_3.a) AND (COALESCE(COALESCE(prt1_3.b, p2_3.b), p3_3.b) = p4_3.b))
842+
Filter: ((COALESCE(COALESCE(COALESCE(prt1_3.a, p2_3.a), p3_3.a), p4_3.a) >= 490) AND (COALESCE(COALESCE(COALESCE(prt1_3.a, p2_3.a), p3_3.a), p4_3.a) <= 510))
843+
-> Hash Full Join
844+
Hash Cond: ((COALESCE(prt1_3.a, p2_3.a) = p3_3.a) AND (COALESCE(prt1_3.b, p2_3.b) = p3_3.b))
845+
-> Hash Full Join
846+
Hash Cond: ((prt1_3.a = p2_3.a) AND (prt1_3.b = p2_3.b))
847+
-> Seq Scan on prt1_p3 prt1_3
848+
-> Hash
849+
-> Seq Scan on prt2_p3 p2_3
850+
-> Hash
851+
-> Seq Scan on prt2_p3 p3_3
852+
-> Hash
853+
-> Seq Scan on prt1_p3 p4_3
854+
(44 rows)
855+
856+
SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b) FULL JOIN prt1 p4 (a,b,c) USING (a, b)
857+
WHERE a BETWEEN 490 AND 510;
858+
count
859+
-------
860+
14
861+
(1 row)
862+
753863
-- Cases with non-nullable expressions in subquery results;
754864
-- make sure these go to null as expected
755865
EXPLAIN (COSTS OFF)

Diff for: src/test/regress/sql/partition_join.sql

+18
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,24 @@ EXPLAIN (COSTS OFF)
145145
SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
146146
SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
147147

148+
--
149+
-- 3-way full join
150+
--
151+
EXPLAIN (COSTS OFF)
152+
SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b)
153+
WHERE a BETWEEN 490 AND 510;
154+
SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b)
155+
WHERE a BETWEEN 490 AND 510;
156+
157+
--
158+
-- 4-way full join
159+
--
160+
EXPLAIN (COSTS OFF)
161+
SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b) FULL JOIN prt1 p4 (a,b,c) USING (a, b)
162+
WHERE a BETWEEN 490 AND 510;
163+
SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b) FULL JOIN prt1 p4 (a,b,c) USING (a, b)
164+
WHERE a BETWEEN 490 AND 510;
165+
148166
-- Cases with non-nullable expressions in subquery results;
149167
-- make sure these go to null as expected
150168
EXPLAIN (COSTS OFF)

0 commit comments

Comments
 (0)