Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 91a7879

Browse files
authored
fix: limit row cnt (#138)
Change the `LIMIT` output cardinality to `min(child, fetch)`. In the below, I hard-coded `PhysicalScan`'s cardinality to 10, because cli does not support stats yet. ``` ❯ create table t1(v1 int); 0 rows in set. Query took 0.011 seconds. Execution took 0.000 secs, Planning took 0.000 secs ❯ insert into t1 values (0), (1), (2), (3); ❯ explain verbose select * from t1 limit 2; +--------------------------------------------------+-------------------------------------------------------------------------------------------------+ | plan_type | plan | +--------------------------------------------------+-------------------------------------------------------------------------------------------------+ | logical_plan after datafusion | Limit: skip=0, fetch=2 | | | Projection: t1.v1 | | | TableScan: t1 | | logical_plan after optd | LogicalLimit { skip: 0, fetch: 2 } | | | └── LogicalProjection { exprs: [ #0 ] } | | | └── LogicalScan { table: t1 } | | physical_plan after optd | PhysicalLimit { skip: 0, fetch: 2, cost: weighted=3.12,row_cnt=1.00,compute=2.12,io=1.00 } | | | └── PhysicalProjection { exprs: [ #0 ], cost: weighted=1.06,row_cnt=1.00,compute=0.06,io=1.00 } | | | └── PhysicalScan { table: t1, cost: weighted=1.00,row_cnt=1.00,compute=0.00,io=1.00 } | | physical_plan after optd-join-order | t1 | | physical_plan after optd-all-join-orders | SAME TEXT AS ABOVE | | physical_plan after optd-all-logical-join-orders | SAME TEXT AS ABOVE | | physical_plan | GlobalLimitExec: skip=0, fetch=2 | | | ProjectionExec: expr=[<expr>@0 as col0] | | | MemoryExec: partitions=1, partition_sizes=[1] | | | | +--------------------------------------------------+-------------------------------------------------------------------------------------------------+ ```
1 parent 41192d4 commit 91a7879

File tree

1 file changed

+36
-3
lines changed

1 file changed

+36
-3
lines changed

optd-datafusion-repr/src/cost/base_cost.rs

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use std::{collections::HashMap, sync::Arc};
22

3-
use crate::plan_nodes::{BinOpType, ColumnRefExpr, LogOpType, OptRelNode, UnOpType};
3+
use crate::plan_nodes::{
4+
BinOpType, ColumnRefExpr, ConstantExpr, ConstantType, LogOpType, OptRelNode, UnOpType,
5+
};
46
use crate::properties::column_ref::{ColumnRefPropertyBuilder, GroupColumnRefs};
57
use crate::{
68
plan_nodes::{OptRelNodeRef, OptRelNodeTyp},
@@ -392,8 +394,39 @@ impl<M: MostCommonValues, D: Distribution> CostModel<OptRelNodeTyp> for OptCostM
392394
OptRelNodeTyp::PhysicalEmptyRelation => Self::cost(0.5, 0.01, 0.0),
393395
OptRelNodeTyp::PhysicalLimit => {
394396
let (row_cnt, compute_cost, _) = Self::cost_tuple(&children[0]);
395-
let selectivity = 0.001;
396-
Self::cost((row_cnt * selectivity).max(1.0), compute_cost, 0.0)
397+
let row_cnt = if let Some(context) = context {
398+
if let Some(optimizer) = optimizer {
399+
let mut fetch_expr =
400+
optimizer.get_all_group_bindings(context.children_group_ids[2], false);
401+
assert!(
402+
fetch_expr.len() == 1,
403+
"fetch expression should be the only expr in the group"
404+
);
405+
let fetch_expr = fetch_expr.pop().unwrap();
406+
assert!(
407+
matches!(
408+
fetch_expr.typ,
409+
OptRelNodeTyp::Constant(ConstantType::UInt64)
410+
),
411+
"fetch type can only be UInt64"
412+
);
413+
let fetch = ConstantExpr::from_rel_node(fetch_expr)
414+
.unwrap()
415+
.value()
416+
.as_u64();
417+
// u64::MAX represents None
418+
if fetch == u64::MAX {
419+
row_cnt
420+
} else {
421+
row_cnt.min(fetch as f64)
422+
}
423+
} else {
424+
(row_cnt * INVALID_SELECTIVITY).max(1.0)
425+
}
426+
} else {
427+
(row_cnt * INVALID_SELECTIVITY).max(1.0)
428+
};
429+
Self::cost(row_cnt, compute_cost, 0.0)
397430
}
398431
OptRelNodeTyp::PhysicalFilter => {
399432
let (row_cnt, _, _) = Self::cost_tuple(&children[0]);

0 commit comments

Comments
 (0)