|
1 | 1 | use std::{collections::HashMap, sync::Arc};
|
2 | 2 |
|
3 |
| -use crate::plan_nodes::{BinOpType, ColumnRefExpr, OptRelNode, UnOpType}; |
| 3 | +use crate::plan_nodes::{BinOpType, ColumnRefExpr, LogOpType, OptRelNode, UnOpType}; |
4 | 4 | use crate::properties::column_ref::{ColumnRefPropertyBuilder, GroupColumnRefs};
|
5 | 5 | use crate::{
|
6 | 6 | plan_nodes::{OptRelNodeRef, OptRelNodeTyp},
|
@@ -298,19 +298,15 @@ impl OptCostModel {
|
298 | 298 | right_child,
|
299 | 299 | column_refs,
|
300 | 300 | )
|
301 |
| - } else if bin_op_typ.is_logical() { |
302 |
| - self.get_logical_bin_op_selectivity( |
303 |
| - bin_op_typ, |
304 |
| - left_child, |
305 |
| - right_child, |
306 |
| - column_refs, |
307 |
| - ) |
308 | 301 | } else if bin_op_typ.is_numerical() {
|
309 | 302 | INVALID_SELECTIVITY
|
310 | 303 | } else {
|
311 | 304 | unreachable!("all BinOpTypes should be true for at least one is_*() function")
|
312 | 305 | }
|
313 | 306 | }
|
| 307 | + OptRelNodeTyp::LogOp(log_op_typ) => { |
| 308 | + self.get_log_op_selectivity(log_op_typ, &expr_tree.children, column_refs) |
| 309 | + } |
314 | 310 | OptRelNodeTyp::UnOp(un_op_typ) => {
|
315 | 311 | assert!(expr_tree.children.len() == 1);
|
316 | 312 | let child = expr_tree.child(0);
|
@@ -532,24 +528,20 @@ impl OptCostModel {
|
532 | 528 | }
|
533 | 529 | }
|
534 | 530 |
|
535 |
| - fn get_logical_bin_op_selectivity( |
| 531 | + fn get_log_op_selectivity( |
536 | 532 | &self,
|
537 |
| - bin_op_typ: BinOpType, |
538 |
| - left: OptRelNodeRef, |
539 |
| - right: OptRelNodeRef, |
| 533 | + log_op_typ: LogOpType, |
| 534 | + children: &[OptRelNodeRef], |
540 | 535 | column_refs: &GroupColumnRefs,
|
541 | 536 | ) -> f64 {
|
542 |
| - assert!(bin_op_typ.is_logical()); |
543 |
| - |
544 |
| - let left_sel = self.get_filter_selectivity(left, column_refs); |
545 |
| - let right_sel = self.get_filter_selectivity(right, column_refs); |
546 |
| - |
547 |
| - match bin_op_typ { |
548 |
| - // note that there's no need to account for nulls here |
549 |
| - // it's also impossible to even account for nulls because we don't know which columns the left and right selectivities are |
550 |
| - BinOpType::And => left_sel * right_sel, |
551 |
| - BinOpType::Or => left_sel + right_sel - left_sel * right_sel, |
552 |
| - _ => unreachable!("we covered all bin_op_typ.is_logical() cases"), |
| 537 | + let children_sel = children |
| 538 | + .iter() |
| 539 | + .map(|expr| self.get_filter_selectivity(expr.clone(), column_refs)); |
| 540 | + |
| 541 | + match log_op_typ { |
| 542 | + LogOpType::And => children_sel.product(), |
| 543 | + // the formula is 1.0 - the probability of _none_ of the events happening |
| 544 | + LogOpType::Or => 1.0 - children_sel.fold(1.0, |acc, sel| acc * (1.0 - sel)), |
553 | 545 | }
|
554 | 546 | }
|
555 | 547 |
|
@@ -595,8 +587,8 @@ mod tests {
|
595 | 587 |
|
596 | 588 | use crate::{
|
597 | 589 | plan_nodes::{
|
598 |
| - BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, Expr, OptRelNode, OptRelNodeRef, |
599 |
| - UnOpExpr, UnOpType, |
| 590 | + BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, Expr, ExprList, LogOpExpr, |
| 591 | + LogOpType, OptRelNode, OptRelNodeRef, UnOpExpr, UnOpType, |
600 | 592 | },
|
601 | 593 | properties::column_ref::ColumnRef,
|
602 | 594 | };
|
@@ -696,6 +688,21 @@ mod tests {
|
696 | 688 | .into_rel_node()
|
697 | 689 | }
|
698 | 690 |
|
| 691 | + fn log_op(op_type: LogOpType, children: Vec<OptRelNodeRef>) -> OptRelNodeRef { |
| 692 | + LogOpExpr::new( |
| 693 | + op_type, |
| 694 | + ExprList::new( |
| 695 | + children |
| 696 | + .into_iter() |
| 697 | + .map(|opt_rel_node_ref| { |
| 698 | + Expr::from_rel_node(opt_rel_node_ref).expect("all children should be Expr") |
| 699 | + }) |
| 700 | + .collect(), |
| 701 | + ), |
| 702 | + ) |
| 703 | + .into_rel_node() |
| 704 | + } |
| 705 | + |
699 | 706 | fn un_op(op_type: UnOpType, child: OptRelNodeRef) -> OptRelNodeRef {
|
700 | 707 | UnOpExpr::new(
|
701 | 708 | Expr::from_rel_node(child).expect("child should be an Expr"),
|
@@ -1136,59 +1143,79 @@ mod tests {
|
1136 | 1143 | fn test_and() {
|
1137 | 1144 | let cost_model = create_one_column_cost_model(PerColumnStats::new(
|
1138 | 1145 | Box::new(MockMostCommonValues {
|
1139 |
| - mcvs: vec![(Value::Int32(1), 0.3), (Value::Int32(5), 0.5)] |
1140 |
| - .into_iter() |
1141 |
| - .collect(), |
| 1146 | + mcvs: vec![ |
| 1147 | + (Value::Int32(1), 0.3), |
| 1148 | + (Value::Int32(5), 0.5), |
| 1149 | + (Value::Int32(8), 0.2), |
| 1150 | + ] |
| 1151 | + .into_iter() |
| 1152 | + .collect(), |
1142 | 1153 | }),
|
1143 | 1154 | 0,
|
1144 | 1155 | 0.0,
|
1145 | 1156 | Box::new(MockDistribution::empty()),
|
1146 | 1157 | ));
|
1147 | 1158 | let eq1 = bin_op(BinOpType::Eq, col_ref(0), cnst(Value::Int32(1)));
|
1148 | 1159 | let eq5 = bin_op(BinOpType::Eq, col_ref(0), cnst(Value::Int32(5)));
|
1149 |
| - let expr_tree = bin_op(BinOpType::And, eq1.clone(), eq5.clone()); |
1150 |
| - let expr_tree_rev = bin_op(BinOpType::And, eq5.clone(), eq1.clone()); |
| 1160 | + let eq8 = bin_op(BinOpType::Eq, col_ref(0), cnst(Value::Int32(8))); |
| 1161 | + let expr_tree = log_op(LogOpType::And, vec![eq1.clone(), eq5.clone(), eq8.clone()]); |
| 1162 | + let expr_tree_shift1 = log_op(LogOpType::And, vec![eq5.clone(), eq8.clone(), eq1.clone()]); |
| 1163 | + let expr_tree_shift2 = log_op(LogOpType::And, vec![eq8.clone(), eq1.clone(), eq5.clone()]); |
1151 | 1164 | let column_refs = vec![ColumnRef::BaseTableColumnRef {
|
1152 | 1165 | table: String::from(TABLE1_NAME),
|
1153 | 1166 | col_idx: 0,
|
1154 | 1167 | }];
|
1155 | 1168 | assert_approx_eq::assert_approx_eq!(
|
1156 | 1169 | cost_model.get_filter_selectivity(expr_tree, &column_refs),
|
1157 |
| - 0.15 |
| 1170 | + 0.03 |
1158 | 1171 | );
|
1159 | 1172 | assert_approx_eq::assert_approx_eq!(
|
1160 |
| - cost_model.get_filter_selectivity(expr_tree_rev, &column_refs), |
1161 |
| - 0.15 |
| 1173 | + cost_model.get_filter_selectivity(expr_tree_shift1, &column_refs), |
| 1174 | + 0.03 |
| 1175 | + ); |
| 1176 | + assert_approx_eq::assert_approx_eq!( |
| 1177 | + cost_model.get_filter_selectivity(expr_tree_shift2, &column_refs), |
| 1178 | + 0.03 |
1162 | 1179 | );
|
1163 | 1180 | }
|
1164 | 1181 |
|
1165 | 1182 | #[test]
|
1166 | 1183 | fn test_or() {
|
1167 | 1184 | let cost_model = create_one_column_cost_model(PerColumnStats::new(
|
1168 | 1185 | Box::new(MockMostCommonValues {
|
1169 |
| - mcvs: vec![(Value::Int32(1), 0.3), (Value::Int32(5), 0.5)] |
1170 |
| - .into_iter() |
1171 |
| - .collect(), |
| 1186 | + mcvs: vec![ |
| 1187 | + (Value::Int32(1), 0.3), |
| 1188 | + (Value::Int32(5), 0.5), |
| 1189 | + (Value::Int32(8), 0.2), |
| 1190 | + ] |
| 1191 | + .into_iter() |
| 1192 | + .collect(), |
1172 | 1193 | }),
|
1173 | 1194 | 0,
|
1174 | 1195 | 0.0,
|
1175 | 1196 | Box::new(MockDistribution::empty()),
|
1176 | 1197 | ));
|
1177 | 1198 | let eq1 = bin_op(BinOpType::Eq, col_ref(0), cnst(Value::Int32(1)));
|
1178 | 1199 | let eq5 = bin_op(BinOpType::Eq, col_ref(0), cnst(Value::Int32(5)));
|
1179 |
| - let expr_tree = bin_op(BinOpType::Or, eq1.clone(), eq5.clone()); |
1180 |
| - let expr_tree_rev = bin_op(BinOpType::Or, eq5.clone(), eq1.clone()); |
| 1200 | + let eq8 = bin_op(BinOpType::Eq, col_ref(0), cnst(Value::Int32(8))); |
| 1201 | + let expr_tree = log_op(LogOpType::Or, vec![eq1.clone(), eq5.clone(), eq8.clone()]); |
| 1202 | + let expr_tree_shift1 = log_op(LogOpType::Or, vec![eq5.clone(), eq8.clone(), eq1.clone()]); |
| 1203 | + let expr_tree_shift2 = log_op(LogOpType::Or, vec![eq8.clone(), eq1.clone(), eq5.clone()]); |
1181 | 1204 | let column_refs = vec![ColumnRef::BaseTableColumnRef {
|
1182 | 1205 | table: String::from(TABLE1_NAME),
|
1183 | 1206 | col_idx: 0,
|
1184 | 1207 | }];
|
1185 | 1208 | assert_approx_eq::assert_approx_eq!(
|
1186 | 1209 | cost_model.get_filter_selectivity(expr_tree, &column_refs),
|
1187 |
| - 0.65 |
| 1210 | + 0.72 |
1188 | 1211 | );
|
1189 | 1212 | assert_approx_eq::assert_approx_eq!(
|
1190 |
| - cost_model.get_filter_selectivity(expr_tree_rev, &column_refs), |
1191 |
| - 0.65 |
| 1213 | + cost_model.get_filter_selectivity(expr_tree_shift1, &column_refs), |
| 1214 | + 0.72 |
| 1215 | + ); |
| 1216 | + assert_approx_eq::assert_approx_eq!( |
| 1217 | + cost_model.get_filter_selectivity(expr_tree_shift2, &column_refs), |
| 1218 | + 0.72 |
1192 | 1219 | );
|
1193 | 1220 | }
|
1194 | 1221 |
|
|
0 commit comments