From 28837554e4efa4d87093294ffed4808155ff7616 Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Mon, 14 Apr 2025 23:55:52 +0300 Subject: [PATCH 01/11] Add support for parameterized clauses in a subplan with a volatile function --- src/backend/optimizer/plan/planner.c | 46 +++++++++++++++++++ src/test/regress/expected/explain_analyze.out | 18 +------- src/test/regress/expected/limit.out | 36 +++++++++++++++ src/test/regress/expected/limit_optimizer.out | 35 ++++++++++++++ src/test/regress/expected/subselect.out | 14 +----- src/test/regress/sql/limit.sql | 16 +++++++ 6 files changed, 135 insertions(+), 30 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 63d8f5e318fb..a44af1677139 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2602,6 +2602,52 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, scanjoin_target_parallel_safe, scanjoin_target_same_exprs); + if (contain_volatile_functions((Node *) scanjoin_target->exprs) && !CdbPathLocus_IsReplicated(root->final_locus)) + { + foreach(lc, current_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + if (CdbPathLocus_IsGeneral(path->locus)) + { + CdbPathLocus_MakeSingleQE(&(path->locus), getgpsegmentCount()); + } + } + } + if (root->is_correlated_subplan && !CdbPathLocus_IsReplicated(root->final_locus)) + { + foreach(lc, current_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + if (CdbPathLocus_IsSingleQE(path->locus)) + { + CdbMotionPath *motion_path; + + motion_path = makeNode(CdbMotionPath); + motion_path->path.pathtype = T_Motion; + motion_path->path.parent = path->parent; + motion_path->path.pathtarget = path->pathtarget; + motion_path->path.rows = path->rows; + motion_path->path.parallel_aware = false; + motion_path->path.parallel_safe = path->parallel_safe; + motion_path->path.parallel_workers = path->parallel_workers; + motion_path->path.pathkeys = NIL; + motion_path->subpath = path; + /* Costs, etc, are same as subpath. */ + motion_path->path.startup_cost = path->total_cost; + motion_path->path.total_cost = path->total_cost; + motion_path->path.memory = path->memory; + motion_path->path.motionHazard = path->motionHazard; + /* Motion nodes are never rescannable. */ + motion_path->path.rescannable = false; + CdbPathLocus_MakeOuterQuery(&motion_path->path.locus); + + Path *mpath = (Path *) create_material_path(root, motion_path->path.parent, &motion_path->path); + lfirst(lc) = mpath; + } + } + set_cheapest(current_rel); + } /* * Save the various upper-rel PathTargets we just computed into * root->upper_targets[]. The core code doesn't use this, but it diff --git a/src/test/regress/expected/explain_analyze.out b/src/test/regress/expected/explain_analyze.out index de9745e81b6c..d7a1510d890a 100644 --- a/src/test/regress/expected/explain_analyze.out +++ b/src/test/regress/expected/explain_analyze.out @@ -132,23 +132,7 @@ explain (analyze, timing off, costs off) select a.i from (select x::int as i, x::int / 5 as j from round(random() / 5) as x) a where a.j = (select round(random() / 5)::int where a.i = 0) and a.i = a.j; - QUERY PLAN ---------------------------------------------------------------------------------- - Function Scan on round x (actual rows=1 loops=1) - Filter: (((SubPlan 1) = (x)::integer) AND (((x)::integer / 5) = (SubPlan 1))) - InitPlan 2 (returns $1) - -> Result (actual rows=1 loops=1) - SubPlan 1 - -> Result (actual rows=1 loops=2) - One-Time Filter: ((x.x)::integer = 0) - Optimizer: Postgres-based planner - Planning Time: 0.245 ms - (slice0) Executor memory: 19K bytes. - (slice1) Executor memory: 36K bytes. Work_mem: 17K bytes max. - Memory used: 128000kB - Execution Time: 0.082 ms -(14 rows) - +ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) -- explain_processing_on drop table slice_test; drop table slice_test2; diff --git a/src/test/regress/expected/limit.out b/src/test/regress/expected/limit.out index ab25b7227536..2dd423eafbd8 100644 --- a/src/test/regress/expected/limit.out +++ b/src/test/regress/expected/limit.out @@ -369,3 +369,39 @@ select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2 45020 | 45020 (3 rows) +-- Check the operation of the parameterized Limit in a subquery with a volatile function +create table limit_tbl(i int) distributed by (i); +insert into limit_tbl select * from generate_series(1, 3) i; +create function f(i int) returns int language plpgsql as $$ begin return i; end; $$; +explain (verbose, costs off) +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: ((SubPlan 1)) + -> Seq Scan on public.limit_tbl + Output: (SubPlan 1) + SubPlan 1 + -> Limit + Output: (f(a.a)) + -> Materialize + Output: (f(a.a)) + -> Broadcast Motion 1:3 (slice2; segments: 1) + Output: (f(a.a)) + -> Function Scan on pg_catalog.generate_series a + Output: f(a.a) + Function Call: generate_series(1, 4) + Optimizer: Postgres-based planner + Settings: optimizer = 'off' +(16 rows) + +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; + r +--- + 3 + 4 + 2 +(3 rows) + +drop function f(int); +drop table limit_tbl; diff --git a/src/test/regress/expected/limit_optimizer.out b/src/test/regress/expected/limit_optimizer.out index cf75089ed712..a0ed54796de0 100644 --- a/src/test/regress/expected/limit_optimizer.out +++ b/src/test/regress/expected/limit_optimizer.out @@ -393,3 +393,38 @@ select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2 45020 | 45020 (3 rows) +-- Check the operation of the parameterized Limit in a subquery with a volatile function +create table limit_tbl(i int) distributed by (i); +insert into limit_tbl select * from generate_series(1, 3) i; +create function f(i int) returns int language plpgsql as $$ begin return i; end; $$; +explain (verbose, costs off) +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; + QUERY PLAN +----------------------------------------------------------------- + Result + Output: (SubPlan 1) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: limit_tbl.i + -> Seq Scan on public.limit_tbl + Output: limit_tbl.i + SubPlan 1 + -> Result + Output: f(generate_series.generate_series) + -> Limit + Output: generate_series.generate_series + -> Function Scan on pg_catalog.generate_series + Output: generate_series.generate_series + Function Call: generate_series(1, 4) + Optimizer: GPORCA +(15 rows) + +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; + r +--- + 2 + 3 + 4 +(3 rows) + +drop function f(int); +drop table limit_tbl; diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 7b428887b826..fd2fd3a80afa 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1054,19 +1054,7 @@ explain (verbose, costs off) explain (verbose, costs off) select x, x from (select (select random() where y=y) as x from (values(1),(2)) v(y)) ss; - QUERY PLAN ----------------------------------------------------------------------------- - Subquery Scan on ss - Output: ss.x, ss.x - -> Values Scan on "*VALUES*" - Output: (SubPlan 1) - SubPlan 1 - -> Result - Output: random() - One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) - Optimizer: Postgres query optimizer -(9 rows) - +ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) -- -- Test rescan of a hashed subplan (the use of random() is to prevent the -- sub-select from being pulled up, which would result in not hashing) diff --git a/src/test/regress/sql/limit.sql b/src/test/regress/sql/limit.sql index 2833ae86b4c9..5cdfd0104c11 100644 --- a/src/test/regress/sql/limit.sql +++ b/src/test/regress/sql/limit.sql @@ -147,3 +147,19 @@ select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2 select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2 from tenk1 group by thousand order by thousand limit 3; + +-- Check the operation of the parameterized Limit in a subquery with a volatile function +-- start_ignore +drop table if exists limit_tbl; +drop function if exists f(int); +-- end_ignore +create table limit_tbl(i int) distributed by (i); +insert into limit_tbl select * from generate_series(1, 3) i; +create function f(i int) returns int language plpgsql as $$ begin return i; end; $$; + +explain (verbose, costs off) +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; +select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl; + +drop function f(int); +drop table limit_tbl; From f8a4d94fa66ebba5ea81a32df725bd0a9fc7535d Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Tue, 15 Apr 2025 09:08:45 +0300 Subject: [PATCH 02/11] Fix output of bad testcases --- .../expected/explain_analyze_optimizer.out | 18 +----------------- .../regress/expected/subselect_optimizer.out | 14 +------------- 2 files changed, 2 insertions(+), 30 deletions(-) diff --git a/src/test/regress/expected/explain_analyze_optimizer.out b/src/test/regress/expected/explain_analyze_optimizer.out index 6c8b05aba024..6a80cd8c045a 100644 --- a/src/test/regress/expected/explain_analyze_optimizer.out +++ b/src/test/regress/expected/explain_analyze_optimizer.out @@ -135,23 +135,7 @@ explain (analyze, timing off, costs off) select a.i from (select x::int as i, x::int / 5 as j from round(random() / 5) as x) a where a.j = (select round(random() / 5)::int where a.i = 0) and a.i = a.j; - QUERY PLAN ---------------------------------------------------------------------------------- - Function Scan on round x (actual rows=1 loops=1) - Filter: (((SubPlan 1) = (x)::integer) AND (((x)::integer / 5) = (SubPlan 1))) - InitPlan 2 (returns $1) - -> Result (actual rows=1 loops=1) - SubPlan 1 - -> Result (actual rows=1 loops=2) - One-Time Filter: ((x.x)::integer = 0) - Optimizer: Postgres-based planner - Planning Time: 2.920 ms - (slice0) Executor memory: 19K bytes. - (slice1) Executor memory: 36K bytes. Work_mem: 17K bytes max. - Memory used: 128000kB - Execution Time: 0.085 ms -(14 rows) - +ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) -- explain_processing_on drop table slice_test; drop table slice_test2; diff --git a/src/test/regress/expected/subselect_optimizer.out b/src/test/regress/expected/subselect_optimizer.out index 72b0263c3c98..a5b361cf1401 100644 --- a/src/test/regress/expected/subselect_optimizer.out +++ b/src/test/regress/expected/subselect_optimizer.out @@ -1104,19 +1104,7 @@ explain (verbose, costs off) explain (verbose, costs off) select x, x from (select (select random() where y=y) as x from (values(1),(2)) v(y)) ss; - QUERY PLAN ----------------------------------------------------------------------------- - Subquery Scan on ss - Output: ss.x, ss.x - -> Values Scan on "*VALUES*" - Output: (SubPlan 1) - SubPlan 1 - -> Result - Output: random() - One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) - Optimizer: Postgres query optimizer -(9 rows) - +ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) -- -- Test rescan of a hashed subplan (the use of random() is to prevent the -- sub-select from being pulled up, which would result in not hashing) From 05865254283b847bf90160731bab93dae665abed Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Wed, 16 Apr 2025 08:28:44 +0300 Subject: [PATCH 03/11] Fix adding Motion(1:1) and correct tests output --- src/backend/cdb/cdbllize.c | 6 +++--- src/test/regress/expected/explain_analyze.out | 19 ++++++++++++++++++- .../expected/explain_analyze_optimizer.out | 19 ++++++++++++++++++- src/test/regress/expected/subselect.out | 17 ++++++++++++++++- .../regress/expected/subselect_optimizer.out | 17 ++++++++++++++++- 5 files changed, 71 insertions(+), 7 deletions(-) diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 89697f23bf1f..2d086e26dfa7 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -989,10 +989,10 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte * For non-top slice, if this motion is QE singleton and subplan's locus * is CdbLocusType_SegmentGeneral, omit this motion. */ - shouldOmit |= context->sliceDepth > 0 && - context->currentPlanFlow->flotype == FLOW_SINGLETON && + shouldOmit |= context->currentPlanFlow->flotype == FLOW_SINGLETON && context->currentPlanFlow->segindex == 0 && - motion->plan.lefttree->flow->locustype == CdbLocusType_SegmentGeneral; + (motion->plan.lefttree->flow->locustype == CdbLocusType_SegmentGeneral || + motion->plan.lefttree->flow->locustype == CdbLocusType_SingleQE); if (shouldOmit) { diff --git a/src/test/regress/expected/explain_analyze.out b/src/test/regress/expected/explain_analyze.out index d7a1510d890a..d1e3fff4bad8 100644 --- a/src/test/regress/expected/explain_analyze.out +++ b/src/test/regress/expected/explain_analyze.out @@ -132,7 +132,24 @@ explain (analyze, timing off, costs off) select a.i from (select x::int as i, x::int / 5 as j from round(random() / 5) as x) a where a.j = (select round(random() / 5)::int where a.i = 0) and a.i = a.j; -ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) + QUERY PLAN +--------------------------------------------------------------------------------- + Function Scan on round x (actual rows=1 loops=1) + Filter: (((SubPlan 1) = (x)::integer) AND (((x)::integer / 5) = (SubPlan 1))) + InitPlan 2 (returns $1) + -> Result (actual rows=1 loops=1) + SubPlan 1 + -> Materialize (actual rows=1 loops=2) + -> Result (actual rows=1 loops=2) + One-Time Filter: ((x.x)::integer = 0) + Optimizer: Postgres-based planner + Planning Time: 0.259 ms + (slice0) Executor memory: 19K bytes. + (slice1) Executor memory: 52K bytes. Work_mem: 17K bytes max. + Memory used: 256000kB + Execution Time: 0.112 ms +(14 rows) + -- explain_processing_on drop table slice_test; drop table slice_test2; diff --git a/src/test/regress/expected/explain_analyze_optimizer.out b/src/test/regress/expected/explain_analyze_optimizer.out index 6a80cd8c045a..588df77164c5 100644 --- a/src/test/regress/expected/explain_analyze_optimizer.out +++ b/src/test/regress/expected/explain_analyze_optimizer.out @@ -135,7 +135,24 @@ explain (analyze, timing off, costs off) select a.i from (select x::int as i, x::int / 5 as j from round(random() / 5) as x) a where a.j = (select round(random() / 5)::int where a.i = 0) and a.i = a.j; -ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) + QUERY PLAN +--------------------------------------------------------------------------------- + Function Scan on round x (actual rows=1 loops=1) + Filter: (((SubPlan 1) = (x)::integer) AND (((x)::integer / 5) = (SubPlan 1))) + InitPlan 2 (returns $1) + -> Result (actual rows=1 loops=1) + SubPlan 1 + -> Materialize (actual rows=1 loops=2) + -> Result (actual rows=1 loops=2) + One-Time Filter: ((x.x)::integer = 0) + Optimizer: Postgres-based planner + Planning Time: 0.259 ms + (slice0) Executor memory: 19K bytes. + (slice1) Executor memory: 52K bytes. Work_mem: 17K bytes max. + Memory used: 256000kB + Execution Time: 0.112 ms +(14 rows) + -- explain_processing_on drop table slice_test; drop table slice_test2; diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index fd2fd3a80afa..cb11d6eec040 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1054,7 +1054,22 @@ explain (verbose, costs off) explain (verbose, costs off) select x, x from (select (select random() where y=y) as x from (values(1),(2)) v(y)) ss; -ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) + QUERY PLAN +---------------------------------------------------------------------------------- + Subquery Scan on ss + Output: ss.x, ss.x + -> Values Scan on "*VALUES*" + Output: (SubPlan 1) + SubPlan 1 + -> Materialize + Output: (random()) + -> Result + Output: random() + One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) + Optimizer: Postgres-based planner + Settings: optimizer = 'off' +(12 rows) + -- -- Test rescan of a hashed subplan (the use of random() is to prevent the -- sub-select from being pulled up, which would result in not hashing) diff --git a/src/test/regress/expected/subselect_optimizer.out b/src/test/regress/expected/subselect_optimizer.out index a5b361cf1401..91e547a6d1ef 100644 --- a/src/test/regress/expected/subselect_optimizer.out +++ b/src/test/regress/expected/subselect_optimizer.out @@ -1104,7 +1104,22 @@ explain (verbose, costs off) explain (verbose, costs off) select x, x from (select (select random() where y=y) as x from (values(1),(2)) v(y)) ss; -ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) + QUERY PLAN +---------------------------------------------------------------------------------- + Subquery Scan on ss + Output: ss.x, ss.x + -> Values Scan on "*VALUES*" + Output: (SubPlan 1) + SubPlan 1 + -> Materialize + Output: (random()) + -> Result + Output: random() + One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) + Optimizer: Postgres-based planner + Settings: optimizer = 'off' +(12 rows) + -- -- Test rescan of a hashed subplan (the use of random() is to prevent the -- sub-select from being pulled up, which would result in not hashing) From 00bb1b4e00a5550c044d411b8b6420373ccbd340 Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Sun, 20 Apr 2025 14:54:09 +0300 Subject: [PATCH 04/11] Consider the case with SegmentGeneral and check tests --- src/backend/optimizer/path/allpaths.c | 5 ++++- src/backend/optimizer/plan/planner.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 49316d3159c1..83bfcce4b988 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -523,8 +523,11 @@ bring_to_outer_query(PlannerInfo *root, RelOptInfo *rel, List *outer_quals) CdbPathLocus outerquery_locus; if (CdbPathLocus_IsGeneral(origpath->locus) || - CdbPathLocus_IsOuterQuery(origpath->locus)) + CdbPathLocus_IsOuterQuery(origpath->locus) || + contain_volatile_functions((Node *) root->processed_tlist)) + { path = origpath; + } else { /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a44af1677139..a87974503025 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2607,7 +2607,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, foreach(lc, current_rel->pathlist) { Path *path = (Path *) lfirst(lc); - if (CdbPathLocus_IsGeneral(path->locus)) + if (CdbPathLocus_IsGeneral(path->locus) || CdbPathLocus_IsSegmentGeneral(path->locus)) { CdbPathLocus_MakeSingleQE(&(path->locus), getgpsegmentCount()); } From 6ab3badd9cf8c265552219618742fd68b6b7506a Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Sun, 20 Apr 2025 23:13:20 +0300 Subject: [PATCH 05/11] Fix tests output --- src/test/regress/expected/gp_tsrf.out | 5 +- .../regress/expected/gp_tsrf_optimizer.out | 5 +- src/test/regress/expected/rpt.out | 81 ++++++++----------- src/test/regress/expected/rpt_optimizer.out | 18 ++--- 4 files changed, 42 insertions(+), 67 deletions(-) diff --git a/src/test/regress/expected/gp_tsrf.out b/src/test/regress/expected/gp_tsrf.out index c8c94fc134cb..e156fb4cdd13 100644 --- a/src/test/regress/expected/gp_tsrf.out +++ b/src/test/regress/expected/gp_tsrf.out @@ -354,10 +354,7 @@ insert into srf_test_t1 values ('ao1_srf_test'::regclass::oid), ('ao2_srf_test': select * from srf_test_t1 where a in (select (gp_toolkit.__gp_aoblkdir(srf_test_t1.a)).row_count from gp_dist_random('gp_id') limit 1); - a ---- -(0 rows) - +ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) drop table ao1_srf_test; drop table ao2_srf_test; drop table srf_test_t1; diff --git a/src/test/regress/expected/gp_tsrf_optimizer.out b/src/test/regress/expected/gp_tsrf_optimizer.out index ce7c1147f4ff..e38a44d043a6 100644 --- a/src/test/regress/expected/gp_tsrf_optimizer.out +++ b/src/test/regress/expected/gp_tsrf_optimizer.out @@ -354,10 +354,7 @@ insert into srf_test_t1 values ('ao1_srf_test'::regclass::oid), ('ao2_srf_test': select * from srf_test_t1 where a in (select (gp_toolkit.__gp_aoblkdir(srf_test_t1.a)).row_count from gp_dist_random('gp_id') limit 1); - a ---- -(0 rows) - +ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) drop table ao1_srf_test; drop table ao2_srf_test; drop table srf_test_t1; diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out index 11f573b7d7c0..74f4a990d4cf 100644 --- a/src/test/regress/expected/rpt.out +++ b/src/test/regress/expected/rpt.out @@ -781,12 +781,10 @@ explain (costs off) select * from t_hashdist where a > All (select random() from -> Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on t_hashdist -> Materialize - -> Result - -> Gather Motion 1:1 (slice2; segments: 1) - -> Subquery Scan on "NotIn_SUBQUERY" - -> Seq Scan on t_replicate_volatile + -> Subquery Scan on "NotIn_SUBQUERY" + -> Seq Scan on t_replicate_volatile Optimizer: Postgres query optimizer -(10 rows) +(8 rows) explain (costs off) select * from t_hashdist where a in (select random()::int from t_replicate_volatile); QUERY PLAN @@ -800,10 +798,9 @@ explain (costs off) select * from t_hashdist where a in (select random()::int fr Hash Key: ((random())::integer) -> HashAggregate Group Key: ((random())::integer) - -> Result -> Seq Scan on t_replicate_volatile Optimizer: Postgres query optimizer -(12 rows) +(11 rows) -- subplan explain (costs off, verbose) select * from t_hashdist left join t_replicate_volatile on t_hashdist.a > any (select random() from t_replicate_volatile); @@ -819,7 +816,7 @@ explain (costs off, verbose) select * from t_hashdist left join t_replicate_vola SubPlan 1 -> Materialize Output: (random()) - -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Broadcast Motion 1:3 (slice2) Output: (random()) -> Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1 Output: random() @@ -839,11 +836,9 @@ explain (costs off) select * from t_hashdist cross join (select random () from t -> Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on t_hashdist -> Materialize - -> Result - -> Gather Motion 1:1 (slice2; segments: 1) - -> Seq Scan on t_replicate_volatile - Optimizer: Postgres query optimizer -(8 rows) + -> Seq Scan on t_replicate_volatile + Optimizer: Postgres-based planner +(6 rows) explain (costs off) select * from t_hashdist cross join (select a, sum(random()) from t_replicate_volatile group by a) x; QUERY PLAN @@ -869,15 +864,13 @@ explain (costs off) select * from t_hashdist cross join (select random() as k, s -> Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on t_hashdist -> Materialize - -> Result - -> Gather Motion 1:1 (slice2; segments: 1) - -> GroupAggregate - Group Key: (random()) - -> Sort - Sort Key: (random()) - -> Seq Scan on t_replicate_volatile + -> GroupAggregate + Group Key: (random()) + -> Sort + Sort Key: (random()) + -> Seq Scan on t_replicate_volatile Optimizer: Postgres query optimizer -(12 rows) +(10 rows) explain (costs off) select * from t_hashdist cross join (select a, sum(b) as s from t_replicate_volatile group by a having sum(b) > random() order by a) x ; QUERY PLAN @@ -903,7 +896,7 @@ explain (costs off) insert into t_replicate_volatile select random() from t_repl --------------------------------------------------------------------------- Insert on t_replicate_volatile -> Broadcast Motion 1:3 (slice1; segments: 1) - -> Result + -> Subquery Scan on "*SELECT*" -> Seq Scan on t_replicate_volatile t_replicate_volatile_1 Optimizer: Postgres query optimizer (5 rows) @@ -1297,24 +1290,20 @@ select * from cte join (select * from t1 join cte using(a)) b using(a); -> Redistribute Motion 1:3 (slice3; segments: 1) Output: share0_ref2.a Hash Key: share0_ref2.a - -> Result + -> Shared Scan (share slice:id 3:0) Output: share0_ref2.a - -> Shared Scan (share slice:id 3:0) - Output: share0_ref2.a -> Hash Output: share0_ref1.a -> Redistribute Motion 1:3 (slice4; segments: 1) Output: share0_ref1.a Hash Key: share0_ref1.a - -> Result + -> Shared Scan (share slice:id 4:0) Output: share0_ref1.a - -> Shared Scan (share slice:id 4:0) - Output: share0_ref1.a - -> Seq Scan on rpt.t2 - Output: ((t2.a)::double precision * random()) + -> Seq Scan on rpt.t2 + Output: ((t2.a)::double precision * random()) Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', gp_cte_sharing = 'on', optimizer = 'off' -(35 rows) +(31 rows) explain (costs off, verbose) with cte as ( select a, count(*) from t2 group by a having count(*) > random() @@ -1384,13 +1373,11 @@ select * from cte join t1 using(a); -> Redistribute Motion 1:3 (slice2; segments: 1) Output: t2.a, (((t2.a)::double precision * random())) Hash Key: t2.a - -> Result - Output: t2.a, (((t2.a)::double precision * random())) - -> Seq Scan on rpt.t2 - Output: t2.a, ((t2.a)::double precision * random()) + -> Seq Scan on rpt.t2 + Output: t2.a, ((t2.a)::double precision * random()) Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', optimizer = 'off' -(18 rows) +(16 rows) explain (costs off, verbose) with cte as ( select a, count(*) from t2 group by a having count(*) > random() @@ -1448,10 +1435,10 @@ explain (costs off, verbose) select * from ( Hash Key: (count(*)) -> Aggregate Output: count(*) - -> Result - -> Table Function Scan on pg_catalog.anytable_out - -> Seq Scan on rpt.t2 - Output: (random())::integer + -> Table Function Scan on pg_catalog.anytable_out + Output: anytable_out + -> Seq Scan on rpt.t2 + Output: (random())::integer Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', optimizer = 'off' (21 rows) @@ -1522,17 +1509,13 @@ a join t_hashdist on a.a = t_hashdist.a; Output: (random()) Group Key: (random()) -> Append - -> Result - Output: (random()) - -> Seq Scan on rpt.t2 - Output: random() - -> Result - Output: (random()) - -> Seq Scan on rpt.t2 t2_1 - Output: random() + -> Seq Scan on rpt.t2 + Output: random() + -> Seq Scan on rpt.t2 t2_1 + Output: random() Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', optimizer = 'off' -(30 rows) +(26 rows) explain (costs off, verbose) select * from ( select a, count(*) from t2 group by a having count(*) > random() diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index b532e888e877..5e9f6eb28e74 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -811,7 +811,7 @@ explain (costs off, verbose) select * from t_hashdist left join t_replicate_vola SubPlan 1 -> Materialize Output: (random()) - -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Broadcast Motion 1:3 (slice2) Output: (random()) -> Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1 Output: random() @@ -1390,13 +1390,11 @@ select * from cte join t1 using(a); -> Redistribute Motion 1:3 (slice2; segments: 1) Output: t2.a, (((t2.a)::double precision * random())) Hash Key: t2.a - -> Result - Output: t2.a, (((t2.a)::double precision * random())) - -> Seq Scan on rpt.t2 - Output: t2.a, ((t2.a)::double precision * random()) + -> Seq Scan on rpt.t2 + Output: t2.a, ((t2.a)::double precision * random()) Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off' -(18 rows) +(16 rows) explain (costs off, verbose) with cte as ( select a, count(*) from t2 group by a having count(*) > random() @@ -1454,10 +1452,10 @@ explain (costs off, verbose) select * from ( Hash Key: (count(*)) -> Aggregate Output: count(*) - -> Result - -> Table Function Scan on pg_catalog.anytable_out - -> Seq Scan on rpt.t2 - Output: (random())::integer + -> Table Function Scan on pg_catalog.anytable_out + Output: anytable_out + -> Seq Scan on rpt.t2 + Output: (random())::integer Optimizer: Postgres-based planner Settings: enable_bitmapscan = 'off', enable_seqscan = 'off' (21 rows) From 37994acc514ee83c5e0dc869d1aabf166aacb4db Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Mon, 21 Apr 2025 09:22:10 +0300 Subject: [PATCH 06/11] Fix rpt.out --- src/test/regress/expected/rpt.out | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out index 74f4a990d4cf..bad68177d1c2 100644 --- a/src/test/regress/expected/rpt.out +++ b/src/test/regress/expected/rpt.out @@ -797,8 +797,8 @@ explain (costs off) select * from t_hashdist where a in (select random()::int fr -> Redistribute Motion 1:3 (slice2; segments: 1) Hash Key: ((random())::integer) -> HashAggregate - Group Key: ((random())::integer) - -> Seq Scan on t_replicate_volatile + Group Key: (random())::integer + -> Seq Scan on t_replicate_volatile Optimizer: Postgres query optimizer (11 rows) From cec1a6dc010f81957e671d76d4a04159c411b9e6 Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Thu, 24 Apr 2025 14:22:40 +0300 Subject: [PATCH 07/11] Add cdbpath_create_motion_to_outer_query function --- src/backend/cdb/cdbpath.c | 9 +++++++ src/backend/optimizer/plan/planner.c | 26 +++---------------- src/include/cdb/cdbpath.h | 3 +++ src/test/regress/expected/explain_analyze.out | 8 +++--- .../expected/explain_analyze_optimizer.out | 8 +++--- 5 files changed, 23 insertions(+), 31 deletions(-) diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 7ed72f8cb8e4..c0af2402abf6 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -2362,6 +2362,15 @@ try_redistribute(PlannerInfo *root, CdbpathMfjRel *g, CdbpathMfjRel *o, return false; } +Path * +cdbpath_create_motion_to_outer_query(PlannerInfo *root, + Path *subpath) +{ + CdbPathLocus outerQueryLocus; + CdbPathLocus_MakeOuterQuery(&outerQueryLocus); + return (Path *) make_motion_path(root, subpath, outerQueryLocus, false, NULL); +} + /* * Add a suitable Motion Path so that the input tuples from 'subpath' are * distributed correctly for insertion into target table. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a87974503025..431ac19e0e3b 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2621,29 +2621,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, if (CdbPathLocus_IsSingleQE(path->locus)) { - CdbMotionPath *motion_path; - - motion_path = makeNode(CdbMotionPath); - motion_path->path.pathtype = T_Motion; - motion_path->path.parent = path->parent; - motion_path->path.pathtarget = path->pathtarget; - motion_path->path.rows = path->rows; - motion_path->path.parallel_aware = false; - motion_path->path.parallel_safe = path->parallel_safe; - motion_path->path.parallel_workers = path->parallel_workers; - motion_path->path.pathkeys = NIL; - motion_path->subpath = path; - /* Costs, etc, are same as subpath. */ - motion_path->path.startup_cost = path->total_cost; - motion_path->path.total_cost = path->total_cost; - motion_path->path.memory = path->memory; - motion_path->path.motionHazard = path->motionHazard; - /* Motion nodes are never rescannable. */ - motion_path->path.rescannable = false; - CdbPathLocus_MakeOuterQuery(&motion_path->path.locus); - - Path *mpath = (Path *) create_material_path(root, motion_path->path.parent, &motion_path->path); - lfirst(lc) = mpath; + Path *motion_path = cdbpath_create_motion_to_outer_query(root, path); + Path *material_path = (Path *) create_material_path(root, motion_path->parent, motion_path); + lfirst(lc) = material_path; } } set_cheapest(current_rel); diff --git a/src/include/cdb/cdbpath.h b/src/include/cdb/cdbpath.h index ed7e7fe8fe71..82231f8b102e 100644 --- a/src/include/cdb/cdbpath.h +++ b/src/include/cdb/cdbpath.h @@ -40,6 +40,9 @@ extern Path *cdbpath_create_redistribute_motion_path_for_exprs(PlannerInfo *root List *hashExprs, List *hashFamilies); +extern Path *cdbpath_create_motion_to_outer_query(PlannerInfo *root, + Path *subpath); + extern Path *create_motion_path_for_ctas(PlannerInfo *root, GpPolicy *policy, Path *subpath); extern Path *create_motion_path_for_insert(PlannerInfo *root, GpPolicy *targetPolicy, Path *subpath); extern Path *create_motion_path_for_upddel(PlannerInfo *root, Index rti, GpPolicy *targetPolicy, Path *subpath); diff --git a/src/test/regress/expected/explain_analyze.out b/src/test/regress/expected/explain_analyze.out index d1e3fff4bad8..222b72859682 100644 --- a/src/test/regress/expected/explain_analyze.out +++ b/src/test/regress/expected/explain_analyze.out @@ -143,11 +143,11 @@ explain (analyze, timing off, costs off) -> Result (actual rows=1 loops=2) One-Time Filter: ((x.x)::integer = 0) Optimizer: Postgres-based planner - Planning Time: 0.259 ms + Planning Time: 0.245 ms (slice0) Executor memory: 19K bytes. - (slice1) Executor memory: 52K bytes. Work_mem: 17K bytes max. - Memory used: 256000kB - Execution Time: 0.112 ms + (slice1) Executor memory: 36K bytes. Work_mem: 17K bytes max. + Memory used: 128000kB + Execution Time: 0.082 ms (14 rows) -- explain_processing_on diff --git a/src/test/regress/expected/explain_analyze_optimizer.out b/src/test/regress/expected/explain_analyze_optimizer.out index 588df77164c5..7e8ac4c2dd20 100644 --- a/src/test/regress/expected/explain_analyze_optimizer.out +++ b/src/test/regress/expected/explain_analyze_optimizer.out @@ -146,11 +146,11 @@ explain (analyze, timing off, costs off) -> Result (actual rows=1 loops=2) One-Time Filter: ((x.x)::integer = 0) Optimizer: Postgres-based planner - Planning Time: 0.259 ms + Planning Time: 2.920 ms (slice0) Executor memory: 19K bytes. - (slice1) Executor memory: 52K bytes. Work_mem: 17K bytes max. - Memory used: 256000kB - Execution Time: 0.112 ms + (slice1) Executor memory: 36K bytes. Work_mem: 17K bytes max. + Memory used: 128000kB + Execution Time: 0.085 ms (14 rows) -- explain_processing_on From f30f637cb259fa833ba990f5160c0fbe7291d4f8 Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Thu, 24 Apr 2025 17:26:31 +0300 Subject: [PATCH 08/11] Make the condition for adding motion in bring_to_outer_query more strict --- src/backend/optimizer/path/allpaths.c | 6 +++--- src/test/regress/expected/gp_tsrf.out | 5 ++++- src/test/regress/expected/gp_tsrf_optimizer.out | 5 ++++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 83bfcce4b988..e073c53800f6 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -522,9 +522,9 @@ bring_to_outer_query(PlannerInfo *root, RelOptInfo *rel, List *outer_quals) Path *path; CdbPathLocus outerquery_locus; - if (CdbPathLocus_IsGeneral(origpath->locus) || - CdbPathLocus_IsOuterQuery(origpath->locus) || - contain_volatile_functions((Node *) root->processed_tlist)) + if (CdbPathLocus_IsGeneral(origpath->locus) || CdbPathLocus_IsOuterQuery(origpath->locus) || + ((CdbPathLocus_IsSegmentGeneral(origpath->locus) || CdbPathLocus_IsSingleQE(origpath->locus)) + && contain_volatile_functions((Node *) root->processed_tlist))) { path = origpath; } diff --git a/src/test/regress/expected/gp_tsrf.out b/src/test/regress/expected/gp_tsrf.out index e156fb4cdd13..c8c94fc134cb 100644 --- a/src/test/regress/expected/gp_tsrf.out +++ b/src/test/regress/expected/gp_tsrf.out @@ -354,7 +354,10 @@ insert into srf_test_t1 values ('ao1_srf_test'::regclass::oid), ('ao2_srf_test': select * from srf_test_t1 where a in (select (gp_toolkit.__gp_aoblkdir(srf_test_t1.a)).row_count from gp_dist_random('gp_id') limit 1); -ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) + a +--- +(0 rows) + drop table ao1_srf_test; drop table ao2_srf_test; drop table srf_test_t1; diff --git a/src/test/regress/expected/gp_tsrf_optimizer.out b/src/test/regress/expected/gp_tsrf_optimizer.out index e38a44d043a6..ce7c1147f4ff 100644 --- a/src/test/regress/expected/gp_tsrf_optimizer.out +++ b/src/test/regress/expected/gp_tsrf_optimizer.out @@ -354,7 +354,10 @@ insert into srf_test_t1 values ('ao1_srf_test'::regclass::oid), ('ao2_srf_test': select * from srf_test_t1 where a in (select (gp_toolkit.__gp_aoblkdir(srf_test_t1.a)).row_count from gp_dist_random('gp_id') limit 1); -ERROR: Passing parameters across motion is not supported. (cdbmutate.c:2051) + a +--- +(0 rows) + drop table ao1_srf_test; drop table ao2_srf_test; drop table srf_test_t1; From 84cd3824ea245b27f0484eae6fdb6912d25f3ae2 Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Mon, 2 Jun 2025 15:44:43 +0300 Subject: [PATCH 09/11] Add comments --- src/backend/optimizer/path/allpaths.c | 9 +++++++++ src/backend/optimizer/plan/planner.c | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index e073c53800f6..196d07d7cfd0 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -522,6 +522,15 @@ bring_to_outer_query(PlannerInfo *root, RelOptInfo *rel, List *outer_quals) Path *path; CdbPathLocus outerquery_locus; + /* + * We can change the locus and add Motion here if we need OuterQuery. + * However, if there is a volatile function in TL, we should do this + * later. The reason for this is that the volatile function in this + * case can be in the Result node (for each segment). We want the + * volatile function to be executed once if possible. So, the locus + * change and Motion addition occurs later after the scan/join path + * is generated (see cdbpath_create_motion_to_outer_query()). + */ if (CdbPathLocus_IsGeneral(origpath->locus) || CdbPathLocus_IsOuterQuery(origpath->locus) || ((CdbPathLocus_IsSegmentGeneral(origpath->locus) || CdbPathLocus_IsSingleQE(origpath->locus)) && contain_volatile_functions((Node *) root->processed_tlist))) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 431ac19e0e3b..0a5457e0a38a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2602,6 +2602,12 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, scanjoin_target_parallel_safe, scanjoin_target_same_exprs); + /* + * If the TL of the subquery contains a volatile function and the data is available + * on all segments, we should change the path locus to SingleQE in order to get a + * single dataset on all segments. We do not take this into account if the final + * locus is Replicated (this case is processed later). + */ if (contain_volatile_functions((Node *) scanjoin_target->exprs) && !CdbPathLocus_IsReplicated(root->final_locus)) { foreach(lc, current_rel->pathlist) @@ -2613,6 +2619,10 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, } } } + /* + * If the subquery contains parameterized operators (correlated), the locus should be + * changed to OuterQuery. We do it here, instead of bring_to_outer_query(). + */ if (root->is_correlated_subplan && !CdbPathLocus_IsReplicated(root->final_locus)) { foreach(lc, current_rel->pathlist) From 2b22a31d4d64db407db59d49f7ee7d67f1f10f26 Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Mon, 2 Jun 2025 20:04:01 +0300 Subject: [PATCH 10/11] Omit unnecessary Materialize and fix test output --- src/backend/cdb/cdbllize.c | 13 +++++++++++++ src/test/regress/expected/explain_analyze.out | 5 ++--- src/test/regress/expected/subselect.out | 10 ++++------ src/test/regress/expected/with_clause.out | 15 +++++++-------- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 2d086e26dfa7..f6e85a5fe7c9 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -112,6 +112,7 @@ typedef struct decorate_subplans_with_motions_context /* Current position in the tree. */ int sliceDepth; Flow *currentPlanFlow; + bool shouldOmitMaterial; } decorate_subplans_with_motions_context; /* State for the recursive build_slice_table() function. */ @@ -725,6 +726,7 @@ cdbllize_decorate_subplans_with_motions(PlannerInfo *root, Plan *plan) planner_init_plan_tree_base(&context.base, root); context.sliceDepth = 0; context.subplan_workingQueue = NIL; + context.shouldOmitMaterial = false; nsubplans = list_length(root->glob->subplans); context.subplans = (decorate_subplan_info *) @@ -1006,6 +1008,7 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte child->initPlan = list_concat(child->initPlan, motion->plan.initPlan); newnode = (Node *) child; + context->shouldOmitMaterial = true; } else { @@ -1023,6 +1026,16 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte if (plan->flow != NULL && plan->flow->locustype != CdbLocusType_OuterQuery) context->currentPlanFlow = plan->flow; newnode = plan_tree_mutator(node, fix_outer_query_motions_mutator, context, false); + + /* If the underlying node was Motion, then omit Matierilze */ + if (IsA(newnode, Material) && context->shouldOmitMaterial) + { + Plan *materialPlan = (Plan *) newnode; + materialPlan->initPlan = list_concat(materialPlan->initPlan, materialPlan->lefttree->initPlan); + materialPlan = materialPlan->lefttree; + newnode = (Node *) materialPlan; + } + context->shouldOmitMaterial = false; context->currentPlanFlow = saveCurrentPlanFlow; } diff --git a/src/test/regress/expected/explain_analyze.out b/src/test/regress/expected/explain_analyze.out index 222b72859682..de9745e81b6c 100644 --- a/src/test/regress/expected/explain_analyze.out +++ b/src/test/regress/expected/explain_analyze.out @@ -139,9 +139,8 @@ explain (analyze, timing off, costs off) InitPlan 2 (returns $1) -> Result (actual rows=1 loops=1) SubPlan 1 - -> Materialize (actual rows=1 loops=2) - -> Result (actual rows=1 loops=2) - One-Time Filter: ((x.x)::integer = 0) + -> Result (actual rows=1 loops=2) + One-Time Filter: ((x.x)::integer = 0) Optimizer: Postgres-based planner Planning Time: 0.245 ms (slice0) Executor memory: 19K bytes. diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index cb11d6eec040..4d986ef72009 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1061,14 +1061,12 @@ explain (verbose, costs off) -> Values Scan on "*VALUES*" Output: (SubPlan 1) SubPlan 1 - -> Materialize - Output: (random()) - -> Result - Output: random() - One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) + -> Result + Output: random() + One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) Optimizer: Postgres-based planner Settings: optimizer = 'off' -(12 rows) +(10 rows) -- -- Test rescan of a hashed subplan (the use of random() is to prevent the diff --git a/src/test/regress/expected/with_clause.out b/src/test/regress/expected/with_clause.out index 1f6b51dd1c30..4b6db572b865 100644 --- a/src/test/regress/expected/with_clause.out +++ b/src/test/regress/expected/with_clause.out @@ -3833,8 +3833,8 @@ with cte as ( ) select * from t1 where t1.i in (select i from cte where cte.i = t1.j) order by 1; - QUERY PLAN --------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: t1.i -> Sort @@ -3844,13 +3844,12 @@ order by 1; SubPlan 1 -> Result Filter: (cte.i = t1.j) - -> Materialize - -> Subquery Scan on cte - -> Shared Scan (share slice:id 1:0) - -> Insert on with_dml_dr - -> Function Scan on generate_series i + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i Optimizer: Postgres-based planner -(15 rows) +(14 rows) with cte as ( insert into with_dml_dr From 47b8dee03f83651d85059f6c9d22841ec37a09a0 Mon Sep 17 00:00:00 2001 From: Aleksandr Kopytov Date: Mon, 2 Jun 2025 20:21:38 +0300 Subject: [PATCH 11/11] Fix test outputs (remove unnecessary Materialize) --- .../expected/explain_analyze_optimizer.out | 5 +- src/test/regress/expected/subselect.out | 40 +++++++++++++-- .../regress/expected/subselect_optimizer.out | 50 +++++++++++++++---- .../expected/with_clause_optimizer.out | 15 +++--- 4 files changed, 84 insertions(+), 26 deletions(-) diff --git a/src/test/regress/expected/explain_analyze_optimizer.out b/src/test/regress/expected/explain_analyze_optimizer.out index 7e8ac4c2dd20..6c8b05aba024 100644 --- a/src/test/regress/expected/explain_analyze_optimizer.out +++ b/src/test/regress/expected/explain_analyze_optimizer.out @@ -142,9 +142,8 @@ explain (analyze, timing off, costs off) InitPlan 2 (returns $1) -> Result (actual rows=1 loops=1) SubPlan 1 - -> Materialize (actual rows=1 loops=2) - -> Result (actual rows=1 loops=2) - One-Time Filter: ((x.x)::integer = 0) + -> Result (actual rows=1 loops=2) + One-Time Filter: ((x.x)::integer = 0) Optimizer: Postgres-based planner Planning Time: 2.920 ms (slice0) Executor memory: 19K bytes. diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 4d986ef72009..112d55bfc3d8 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1054,8 +1054,8 @@ explain (verbose, costs off) explain (verbose, costs off) select x, x from (select (select random() where y=y) as x from (values(1),(2)) v(y)) ss; - QUERY PLAN ----------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------- Subquery Scan on ss Output: ss.x, ss.x -> Values Scan on "*VALUES*" @@ -1064,9 +1064,8 @@ explain (verbose, costs off) -> Result Output: random() One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) - Optimizer: Postgres-based planner - Settings: optimizer = 'off' -(10 rows) + Optimizer: Postgres query optimizer +(9 rows) -- -- Test rescan of a hashed subplan (the use of random() is to prevent the @@ -2159,3 +2158,34 @@ drop table tl1; drop table tl2; drop table tl3; drop table tl4; +--Test case for subquery, which returns more than one rows +create table table1 as + select * from (values (1, 0), (1, 0)) v(a, b) distributed by (a); +create table table2 as + select * from (values (0, 10), (0, 10)) v(a, b) distributed by (a); +explain (costs off) +select * from table1 where 10 in + (select b from table2 where table2.a = 0 or table1.b = table2.b); + QUERY PLAN +----------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on table1 + Filter: (SubPlan 1) + SubPlan 1 + -> Result + Filter: ((table2.a = 0) OR (table1.b = table2.b)) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on table2 + Optimizer: Postgres-based planner +(10 rows) + +select * from table1 where 10 in + (select b from table2 where table2.a = 0 or table1.b = table2.b); + a | b +---+--- + 1 | 0 + 1 | 0 +(2 rows) + +drop table table1, table2; diff --git a/src/test/regress/expected/subselect_optimizer.out b/src/test/regress/expected/subselect_optimizer.out index 91e547a6d1ef..7ef99fd89347 100644 --- a/src/test/regress/expected/subselect_optimizer.out +++ b/src/test/regress/expected/subselect_optimizer.out @@ -1104,21 +1104,18 @@ explain (verbose, costs off) explain (verbose, costs off) select x, x from (select (select random() where y=y) as x from (values(1),(2)) v(y)) ss; - QUERY PLAN ----------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------- Subquery Scan on ss Output: ss.x, ss.x -> Values Scan on "*VALUES*" Output: (SubPlan 1) SubPlan 1 - -> Materialize - Output: (random()) - -> Result - Output: random() - One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) - Optimizer: Postgres-based planner - Settings: optimizer = 'off' -(12 rows) + -> Result + Output: random() + One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) + Optimizer: Postgres query optimizer +(9 rows) -- -- Test rescan of a hashed subplan (the use of random() is to prevent the @@ -2248,3 +2245,36 @@ drop table tl1; drop table tl2; drop table tl3; drop table tl4; +--Test case for subquery, which returns more than one rows +create table table1 as + select * from (values (1, 0), (1, 0)) v(a, b) distributed by (a); +create table table2 as + select * from (values (0, 10), (0, 10)) v(a, b) distributed by (a); +explain (costs off) +select * from table1 where 10 in + (select b from table2 where table2.a = 0 or table1.b = table2.b); + QUERY PLAN +-------------------------------------------------------------- + Result + Filter: (SubPlan 1) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on table1 + SubPlan 1 + -> Result + Filter: ((table2.a = 0) OR (table1.b = table2.b)) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on table2 + Filter: (10 = b) + Optimizer: GPORCA +(12 rows) + +select * from table1 where 10 in + (select b from table2 where table2.a = 0 or table1.b = table2.b); + a | b +---+--- + 1 | 0 + 1 | 0 +(2 rows) + +drop table table1, table2; diff --git a/src/test/regress/expected/with_clause_optimizer.out b/src/test/regress/expected/with_clause_optimizer.out index ed6ce8a5e3a1..63a28162fcf7 100644 --- a/src/test/regress/expected/with_clause_optimizer.out +++ b/src/test/regress/expected/with_clause_optimizer.out @@ -3842,8 +3842,8 @@ with cte as ( ) select * from t1 where t1.i in (select i from cte where cte.i = t1.j) order by 1; - QUERY PLAN --------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: t1.i -> Sort @@ -3853,13 +3853,12 @@ order by 1; SubPlan 1 -> Result Filter: (cte.i = t1.j) - -> Materialize - -> Subquery Scan on cte - -> Shared Scan (share slice:id 1:0) - -> Insert on with_dml_dr - -> Function Scan on generate_series i + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i Optimizer: Postgres-based planner -(15 rows) +(14 rows) with cte as ( insert into with_dml_dr