diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 89697f23bf1f..f6e85a5fe7c9 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -112,6 +112,7 @@ typedef struct decorate_subplans_with_motions_context /* Current position in the tree. */ int sliceDepth; Flow *currentPlanFlow; + bool shouldOmitMaterial; } decorate_subplans_with_motions_context; /* State for the recursive build_slice_table() function. */ @@ -725,6 +726,7 @@ cdbllize_decorate_subplans_with_motions(PlannerInfo *root, Plan *plan) planner_init_plan_tree_base(&context.base, root); context.sliceDepth = 0; context.subplan_workingQueue = NIL; + context.shouldOmitMaterial = false; nsubplans = list_length(root->glob->subplans); context.subplans = (decorate_subplan_info *) @@ -989,10 +991,10 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte * For non-top slice, if this motion is QE singleton and subplan's locus * is CdbLocusType_SegmentGeneral, omit this motion. */ - shouldOmit |= context->sliceDepth > 0 && - context->currentPlanFlow->flotype == FLOW_SINGLETON && + shouldOmit |= context->currentPlanFlow->flotype == FLOW_SINGLETON && context->currentPlanFlow->segindex == 0 && - motion->plan.lefttree->flow->locustype == CdbLocusType_SegmentGeneral; + (motion->plan.lefttree->flow->locustype == CdbLocusType_SegmentGeneral || + motion->plan.lefttree->flow->locustype == CdbLocusType_SingleQE); if (shouldOmit) { @@ -1006,6 +1008,7 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte child->initPlan = list_concat(child->initPlan, motion->plan.initPlan); newnode = (Node *) child; + context->shouldOmitMaterial = true; } else { @@ -1023,6 +1026,16 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte if (plan->flow != NULL && plan->flow->locustype != CdbLocusType_OuterQuery) context->currentPlanFlow = plan->flow; newnode = plan_tree_mutator(node, fix_outer_query_motions_mutator, context, false); + + /* If the underlying node was Motion, then omit Matierilze */ + if (IsA(newnode, Material) && context->shouldOmitMaterial) + { + Plan *materialPlan = (Plan *) newnode; + materialPlan->initPlan = list_concat(materialPlan->initPlan, materialPlan->lefttree->initPlan); + materialPlan = materialPlan->lefttree; + newnode = (Node *) materialPlan; + } + context->shouldOmitMaterial = false; context->currentPlanFlow = saveCurrentPlanFlow; } diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 7ed72f8cb8e4..c0af2402abf6 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -2362,6 +2362,15 @@ try_redistribute(PlannerInfo *root, CdbpathMfjRel *g, CdbpathMfjRel *o, return false; } +Path * +cdbpath_create_motion_to_outer_query(PlannerInfo *root, + Path *subpath) +{ + CdbPathLocus outerQueryLocus; + CdbPathLocus_MakeOuterQuery(&outerQueryLocus); + return (Path *) make_motion_path(root, subpath, outerQueryLocus, false, NULL); +} + /* * Add a suitable Motion Path so that the input tuples from 'subpath' are * distributed correctly for insertion into target table. diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 8f63290f1650..d5fa6f454ca1 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -522,9 +522,21 @@ bring_to_outer_query(PlannerInfo *root, RelOptInfo *rel, List *outer_quals) Path *path; CdbPathLocus outerquery_locus; - if (CdbPathLocus_IsGeneral(origpath->locus) || - CdbPathLocus_IsOuterQuery(origpath->locus)) + /* + * We can change the locus and add Motion here if we need OuterQuery. + * However, if there is a volatile function in TL, we should do this + * later. The reason for this is that the volatile function in this + * case can be in the Result node (for each segment). We want the + * volatile function to be executed once if possible. So, the locus + * change and Motion addition occurs later after the scan/join path + * is generated (see cdbpath_create_motion_to_outer_query()). + */ + if (CdbPathLocus_IsGeneral(origpath->locus) || CdbPathLocus_IsOuterQuery(origpath->locus) || + ((CdbPathLocus_IsSegmentGeneral(origpath->locus) || CdbPathLocus_IsSingleQE(origpath->locus)) + && contain_volatile_functions((Node *) root->processed_tlist))) + { path = origpath; + } else { /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 86f04eef208e..56f5a7a79035 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2600,6 +2600,42 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, scanjoin_target_parallel_safe, scanjoin_target_same_exprs); + /* + * If the TL of the subquery contains a volatile function and the data is available + * on all segments, we should change the path locus to SingleQE in order to get a + * single dataset on all segments. We do not take this into account if the final + * locus is Replicated (this case is processed later). + */ + if (contain_volatile_functions((Node *) scanjoin_target->exprs) && !CdbPathLocus_IsReplicated(root->final_locus)) + { + foreach(lc, current_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + if (CdbPathLocus_IsGeneral(path->locus) || CdbPathLocus_IsSegmentGeneral(path->locus)) + { + CdbPathLocus_MakeSingleQE(&(path->locus), getgpsegmentCount()); + } + } + } + /* + * If the subquery contains parameterized operators (correlated), the locus should be + * changed to OuterQuery. We do it here, instead of bring_to_outer_query(). + */ + if (root->is_correlated_subplan && !CdbPathLocus_IsReplicated(root->final_locus)) + { + foreach(lc, current_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + if (CdbPathLocus_IsSingleQE(path->locus)) + { + Path *motion_path = cdbpath_create_motion_to_outer_query(root, path); + Path *material_path = (Path *) create_material_path(root, motion_path->parent, motion_path); + lfirst(lc) = material_path; + } + } + set_cheapest(current_rel); + } /* * Save the various upper-rel PathTargets we just computed into * root->upper_targets[]. The core code doesn't use this, but it diff --git a/src/include/cdb/cdbpath.h b/src/include/cdb/cdbpath.h index ed7e7fe8fe71..82231f8b102e 100644 --- a/src/include/cdb/cdbpath.h +++ b/src/include/cdb/cdbpath.h @@ -40,6 +40,9 @@ extern Path *cdbpath_create_redistribute_motion_path_for_exprs(PlannerInfo *root List *hashExprs, List *hashFamilies); +extern Path *cdbpath_create_motion_to_outer_query(PlannerInfo *root, + Path *subpath); + extern Path *create_motion_path_for_ctas(PlannerInfo *root, GpPolicy *policy, Path *subpath); extern Path *create_motion_path_for_insert(PlannerInfo *root, GpPolicy *targetPolicy, Path *subpath); extern Path *create_motion_path_for_upddel(PlannerInfo *root, Index rti, GpPolicy *targetPolicy, Path *subpath); diff --git a/src/test/regress/expected/limit.out b/src/test/regress/expected/limit.out index ab25b7227536..2dd423eafbd8 100644 --- a/src/test/regress/expected/limit.out +++ b/src/test/regress/expected/limit.out @@ -369,3 +369,39 @@ select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2 45020 | 45020 (3 rows) +-- Check the operation of the parameterized Limit in a subquery with a volatile function +create table limit_tbl(i int) distributed by (i); +insert into limit_tbl select * from generate_series(1, 3) i; +create function f(i int) returns int language plpgsql as $$ begin return i; end; $$; +explain (verbose, costs off) +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: ((SubPlan 1)) + -> Seq Scan on public.limit_tbl + Output: (SubPlan 1) + SubPlan 1 + -> Limit + Output: (f(a.a)) + -> Materialize + Output: (f(a.a)) + -> Broadcast Motion 1:3 (slice2; segments: 1) + Output: (f(a.a)) + -> Function Scan on pg_catalog.generate_series a + Output: f(a.a) + Function Call: generate_series(1, 4) + Optimizer: Postgres-based planner + Settings: optimizer = 'off' +(16 rows) + +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; + r +--- + 3 + 4 + 2 +(3 rows) + +drop function f(int); +drop table limit_tbl; diff --git a/src/test/regress/expected/limit_optimizer.out b/src/test/regress/expected/limit_optimizer.out index cf75089ed712..a0ed54796de0 100644 --- a/src/test/regress/expected/limit_optimizer.out +++ b/src/test/regress/expected/limit_optimizer.out @@ -393,3 +393,38 @@ select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2 45020 | 45020 (3 rows) +-- Check the operation of the parameterized Limit in a subquery with a volatile function +create table limit_tbl(i int) distributed by (i); +insert into limit_tbl select * from generate_series(1, 3) i; +create function f(i int) returns int language plpgsql as $$ begin return i; end; $$; +explain (verbose, costs off) +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; + QUERY PLAN +----------------------------------------------------------------- + Result + Output: (SubPlan 1) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: limit_tbl.i + -> Seq Scan on public.limit_tbl + Output: limit_tbl.i + SubPlan 1 + -> Result + Output: f(generate_series.generate_series) + -> Limit + Output: generate_series.generate_series + -> Function Scan on pg_catalog.generate_series + Output: generate_series.generate_series + Function Call: generate_series(1, 4) + Optimizer: GPORCA +(15 rows) + +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; + r +--- + 2 + 3 + 4 +(3 rows) + +drop function f(int); +drop table limit_tbl; diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out index 11f573b7d7c0..bad68177d1c2 100644 --- a/src/test/regress/expected/rpt.out +++ b/src/test/regress/expected/rpt.out @@ -781,12 +781,10 @@ explain (costs off) select * from t_hashdist where a > All (select random() from -> Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on t_hashdist -> Materialize - -> Result - -> Gather Motion 1:1 (slice2; segments: 1) - -> Subquery Scan on "NotIn_SUBQUERY" - -> Seq Scan on t_replicate_volatile + -> Subquery Scan on "NotIn_SUBQUERY" + -> Seq Scan on t_replicate_volatile Optimizer: Postgres query optimizer -(10 rows) +(8 rows) explain (costs off) select * from t_hashdist where a in (select random()::int from t_replicate_volatile); QUERY PLAN @@ -799,11 +797,10 @@ explain (costs off) select * from t_hashdist where a in (select random()::int fr -> Redistribute Motion 1:3 (slice2; segments: 1) Hash Key: ((random())::integer) -> HashAggregate - Group Key: ((random())::integer) - -> Result - -> Seq Scan on t_replicate_volatile + Group Key: (random())::integer + -> Seq Scan on t_replicate_volatile Optimizer: Postgres query optimizer -(12 rows) +(11 rows) -- subplan explain (costs off, verbose) select * from t_hashdist left join t_replicate_volatile on t_hashdist.a > any (select random() from t_replicate_volatile); @@ -819,7 +816,7 @@ explain (costs off, verbose) select * from t_hashdist left join t_replicate_vola SubPlan 1 -> Materialize Output: (random()) - -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Broadcast Motion 1:3 (slice2) Output: (random()) -> Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1 Output: random() @@ -839,11 +836,9 @@ explain (costs off) select * from t_hashdist cross join (select random () from t -> Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on t_hashdist -> Materialize - -> Result - -> Gather Motion 1:1 (slice2; segments: 1) - -> Seq Scan on t_replicate_volatile - Optimizer: Postgres query optimizer -(8 rows) + -> Seq Scan on t_replicate_volatile + Optimizer: Postgres-based planner +(6 rows) explain (costs off) select * from t_hashdist cross join (select a, sum(random()) from t_replicate_volatile group by a) x; QUERY PLAN @@ -869,15 +864,13 @@ explain (costs off) select * from t_hashdist cross join (select random() as k, s -> Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on t_hashdist -> Materialize - -> Result - -> Gather Motion 1:1 (slice2; segments: 1) - -> GroupAggregate - Group Key: (random()) - -> Sort - Sort Key: (random()) - -> Seq Scan on t_replicate_volatile + -> GroupAggregate + Group Key: (random()) + -> Sort + Sort Key: (random()) + -> Seq Scan on t_replicate_volatile Optimizer: Postgres query optimizer -(12 rows) +(10 rows) explain (costs off) select * from t_hashdist cross join (select a, sum(b) as s from t_replicate_volatile group by a having sum(b) > random() order by a) x ; QUERY PLAN @@ -903,7 +896,7 @@ explain (costs off) insert into t_replicate_volatile select random() from t_repl --------------------------------------------------------------------------- Insert on t_replicate_volatile -> Broadcast Motion 1:3 (slice1; segments: 1) - -> Result + -> Subquery Scan on "*SELECT*" -> Seq Scan on t_replicate_volatile t_replicate_volatile_1 Optimizer: Postgres query optimizer (5 rows) @@ -1297,24 +1290,20 @@ select * from cte join (select * from t1 join cte using(a)) b using(a); -> Redistribute Motion 1:3 (slice3; segments: 1) Output: share0_ref2.a Hash Key: share0_ref2.a - -> Result + -> Shared Scan (share slice:id 3:0) Output: share0_ref2.a - -> Shared Scan (share slice:id 3:0) - Output: share0_ref2.a -> Hash Output: share0_ref1.a -> Redistribute Motion 1:3 (slice4; segments: 1) Output: share0_ref1.a Hash Key: share0_ref1.a - -> Result + -> Shared Scan (share slice:id 4:0) Output: share0_ref1.a - -> Shared Scan (share slice:id 4:0) - Output: share0_ref1.a - -> Seq Scan on rpt.t2 - Output: ((t2.a)::double precision * random()) + -> Seq Scan on rpt.t2 + Output: ((t2.a)::double precision * random()) Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', gp_cte_sharing = 'on', optimizer = 'off' -(35 rows) +(31 rows) explain (costs off, verbose) with cte as ( select a, count(*) from t2 group by a having count(*) > random() @@ -1384,13 +1373,11 @@ select * from cte join t1 using(a); -> Redistribute Motion 1:3 (slice2; segments: 1) Output: t2.a, (((t2.a)::double precision * random())) Hash Key: t2.a - -> Result - Output: t2.a, (((t2.a)::double precision * random())) - -> Seq Scan on rpt.t2 - Output: t2.a, ((t2.a)::double precision * random()) + -> Seq Scan on rpt.t2 + Output: t2.a, ((t2.a)::double precision * random()) Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', optimizer = 'off' -(18 rows) +(16 rows) explain (costs off, verbose) with cte as ( select a, count(*) from t2 group by a having count(*) > random() @@ -1448,10 +1435,10 @@ explain (costs off, verbose) select * from ( Hash Key: (count(*)) -> Aggregate Output: count(*) - -> Result - -> Table Function Scan on pg_catalog.anytable_out - -> Seq Scan on rpt.t2 - Output: (random())::integer + -> Table Function Scan on pg_catalog.anytable_out + Output: anytable_out + -> Seq Scan on rpt.t2 + Output: (random())::integer Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', optimizer = 'off' (21 rows) @@ -1522,17 +1509,13 @@ a join t_hashdist on a.a = t_hashdist.a; Output: (random()) Group Key: (random()) -> Append - -> Result - Output: (random()) - -> Seq Scan on rpt.t2 - Output: random() - -> Result - Output: (random()) - -> Seq Scan on rpt.t2 t2_1 - Output: random() + -> Seq Scan on rpt.t2 + Output: random() + -> Seq Scan on rpt.t2 t2_1 + Output: random() Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', optimizer = 'off' -(30 rows) +(26 rows) explain (costs off, verbose) select * from ( select a, count(*) from t2 group by a having count(*) > random() diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index b532e888e877..5e9f6eb28e74 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -811,7 +811,7 @@ explain (costs off, verbose) select * from t_hashdist left join t_replicate_vola SubPlan 1 -> Materialize Output: (random()) - -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Broadcast Motion 1:3 (slice2) Output: (random()) -> Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1 Output: random() @@ -1390,13 +1390,11 @@ select * from cte join t1 using(a); -> Redistribute Motion 1:3 (slice2; segments: 1) Output: t2.a, (((t2.a)::double precision * random())) Hash Key: t2.a - -> Result - Output: t2.a, (((t2.a)::double precision * random())) - -> Seq Scan on rpt.t2 - Output: t2.a, ((t2.a)::double precision * random()) + -> Seq Scan on rpt.t2 + Output: t2.a, ((t2.a)::double precision * random()) Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off' -(18 rows) +(16 rows) explain (costs off, verbose) with cte as ( select a, count(*) from t2 group by a having count(*) > random() @@ -1454,10 +1452,10 @@ explain (costs off, verbose) select * from ( Hash Key: (count(*)) -> Aggregate Output: count(*) - -> Result - -> Table Function Scan on pg_catalog.anytable_out - -> Seq Scan on rpt.t2 - Output: (random())::integer + -> Table Function Scan on pg_catalog.anytable_out + Output: anytable_out + -> Seq Scan on rpt.t2 + Output: (random())::integer Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off' (21 rows) diff --git a/src/test/regress/expected/with_clause.out b/src/test/regress/expected/with_clause.out index 1f6b51dd1c30..4b6db572b865 100644 --- a/src/test/regress/expected/with_clause.out +++ b/src/test/regress/expected/with_clause.out @@ -3833,8 +3833,8 @@ with cte as ( ) select * from t1 where t1.i in (select i from cte where cte.i = t1.j) order by 1; - QUERY PLAN --------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: t1.i -> Sort @@ -3844,13 +3844,12 @@ order by 1; SubPlan 1 -> Result Filter: (cte.i = t1.j) - -> Materialize - -> Subquery Scan on cte - -> Shared Scan (share slice:id 1:0) - -> Insert on with_dml_dr - -> Function Scan on generate_series i + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i Optimizer: Postgres-based planner -(15 rows) +(14 rows) with cte as ( insert into with_dml_dr diff --git a/src/test/regress/expected/with_clause_optimizer.out b/src/test/regress/expected/with_clause_optimizer.out index ed6ce8a5e3a1..63a28162fcf7 100644 --- a/src/test/regress/expected/with_clause_optimizer.out +++ b/src/test/regress/expected/with_clause_optimizer.out @@ -3842,8 +3842,8 @@ with cte as ( ) select * from t1 where t1.i in (select i from cte where cte.i = t1.j) order by 1; - QUERY PLAN --------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: t1.i -> Sort @@ -3853,13 +3853,12 @@ order by 1; SubPlan 1 -> Result Filter: (cte.i = t1.j) - -> Materialize - -> Subquery Scan on cte - -> Shared Scan (share slice:id 1:0) - -> Insert on with_dml_dr - -> Function Scan on generate_series i + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i Optimizer: Postgres-based planner -(15 rows) +(14 rows) with cte as ( insert into with_dml_dr diff --git a/src/test/regress/sql/limit.sql b/src/test/regress/sql/limit.sql index 2833ae86b4c9..5cdfd0104c11 100644 --- a/src/test/regress/sql/limit.sql +++ b/src/test/regress/sql/limit.sql @@ -147,3 +147,19 @@ select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2 select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2 from tenk1 group by thousand order by thousand limit 3; + +-- Check the operation of the parameterized Limit in a subquery with a volatile function +-- start_ignore +drop table if exists limit_tbl; +drop function if exists f(int); +-- end_ignore +create table limit_tbl(i int) distributed by (i); +insert into limit_tbl select * from generate_series(1, 3) i; +create function f(i int) returns int language plpgsql as $$ begin return i; end; $$; + +explain (verbose, costs off) +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; + +drop function f(int); +drop table limit_tbl;