|
1 |
| -/* |
2 |
| - * Parseable Server (C) 2022 - 2024 Parseable, Inc. |
3 |
| - * |
4 |
| - * This program is free software: you can redistribute it and/or modify |
5 |
| - * it under the terms of the GNU Affero General Public License as |
6 |
| - * published by the Free Software Foundation, either version 3 of the |
7 |
| - * License, or (at your option) any later version. |
8 |
| - * |
9 |
| - * This program is distributed in the hope that it will be useful, |
10 |
| - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 |
| - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 |
| - * GNU Affero General Public License for more details. |
13 |
| - * |
14 |
| - * You should have received a copy of the GNU Affero General Public License |
15 |
| - * along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 |
| - * |
17 |
| - */ |
18 |
| - |
19 |
| -use std::{collections::HashMap, sync::Arc}; |
20 |
| - |
21 |
| -use datafusion::{ |
22 |
| - common::{DFField, DFSchema}, |
23 |
| - logical_expr::{Filter, LogicalPlan, Projection}, |
24 |
| - optimizer::{optimize_children, OptimizerRule}, |
25 |
| - prelude::{lit, or, Column, Expr}, |
26 |
| - scalar::ScalarValue, |
27 |
| -}; |
28 |
| - |
29 |
| -/// Rewrites logical plan for source using projection and filter |
30 |
| -pub struct FilterOptimizerRule { |
31 |
| - pub column: String, |
32 |
| - pub literals: Vec<String>, |
33 |
| -} |
34 |
| - |
35 |
| -// Try to add filter node on table scan |
36 |
| -// As every table supports projection push down |
37 |
| -// we try to directly add projection for column directly to table |
38 |
| -// To preserve the orignal projection we must add a projection node with orignal projection |
39 |
| -impl OptimizerRule for FilterOptimizerRule { |
40 |
| - fn try_optimize( |
41 |
| - &self, |
42 |
| - plan: &datafusion::logical_expr::LogicalPlan, |
43 |
| - config: &dyn datafusion::optimizer::OptimizerConfig, |
44 |
| - ) -> datafusion::error::Result<Option<datafusion::logical_expr::LogicalPlan>> { |
45 |
| - // if there are no patterns then the rule cannot be performed |
46 |
| - let Some(filter_expr) = self.expr() else { |
47 |
| - return Ok(None); |
48 |
| - }; |
49 |
| - |
50 |
| - if let LogicalPlan::Filter(filter) = plan { |
51 |
| - if filter.predicate == filter_expr { |
52 |
| - return Ok(None); |
53 |
| - } |
54 |
| - } |
55 |
| - |
56 |
| - if let LogicalPlan::TableScan(table) = plan { |
57 |
| - if table.projection.is_none() |
58 |
| - || table |
59 |
| - .filters |
60 |
| - .iter() |
61 |
| - .any(|expr| self.contains_valid_tag_filter(expr)) |
62 |
| - { |
63 |
| - return Ok(None); |
64 |
| - } |
65 |
| - |
66 |
| - let mut table = table.clone(); |
67 |
| - let schema = &table.source.schema(); |
68 |
| - let orignal_projection = table.projected_schema.clone(); |
69 |
| - |
70 |
| - // add filtered column projection to table |
71 |
| - if !table |
72 |
| - .projected_schema |
73 |
| - .has_column_with_unqualified_name(&self.column) |
74 |
| - { |
75 |
| - let tags_index = schema.index_of(&self.column)?; |
76 |
| - let tags_field = schema.field(tags_index); |
77 |
| - // modify source table projection to include tags |
78 |
| - let mut df_schema = table.projected_schema.fields().clone(); |
79 |
| - df_schema.push(DFField::new( |
80 |
| - Some(table.table_name.clone()), |
81 |
| - tags_field.name(), |
82 |
| - tags_field.data_type().clone(), |
83 |
| - tags_field.is_nullable(), |
84 |
| - )); |
85 |
| - |
86 |
| - table.projected_schema = |
87 |
| - Arc::new(DFSchema::new_with_metadata(df_schema, HashMap::default())?); |
88 |
| - if let Some(projection) = &mut table.projection { |
89 |
| - projection.push(tags_index) |
90 |
| - } |
91 |
| - } |
92 |
| - |
93 |
| - let filter = LogicalPlan::Filter(Filter::try_new( |
94 |
| - filter_expr, |
95 |
| - Arc::new(LogicalPlan::TableScan(table)), |
96 |
| - )?); |
97 |
| - let plan = LogicalPlan::Projection(Projection::new_from_schema( |
98 |
| - Arc::new(filter), |
99 |
| - orignal_projection, |
100 |
| - )); |
101 |
| - |
102 |
| - return Ok(Some(plan)); |
103 |
| - } |
104 |
| - |
105 |
| - // If we didn't find anything then recurse as normal and build the result. |
106 |
| - optimize_children(self, plan, config) |
107 |
| - } |
108 |
| - |
109 |
| - fn name(&self) -> &str { |
110 |
| - "parseable_read_filter" |
111 |
| - } |
112 |
| -} |
113 |
| - |
114 |
| -impl FilterOptimizerRule { |
115 |
| - fn expr(&self) -> Option<Expr> { |
116 |
| - let mut patterns = self.literals.iter().map(|literal| { |
117 |
| - Expr::Column(Column::from_name(&self.column)).like(lit(format!("%{}%", literal))) |
118 |
| - }); |
119 |
| - |
120 |
| - let mut filter_expr = patterns.next()?; |
121 |
| - for expr in patterns { |
122 |
| - filter_expr = or(filter_expr, expr) |
123 |
| - } |
124 |
| - |
125 |
| - Some(filter_expr) |
126 |
| - } |
127 |
| - |
128 |
| - fn contains_valid_tag_filter(&self, expr: &Expr) -> bool { |
129 |
| - match expr { |
130 |
| - Expr::Like(like) => { |
131 |
| - let matches_column = match &*like.expr { |
132 |
| - Expr::Column(column) => column.name == self.column, |
133 |
| - _ => return false, |
134 |
| - }; |
135 |
| - |
136 |
| - let matches_pattern = match &*like.pattern { |
137 |
| - Expr::Literal(ScalarValue::Utf8(Some(literal))) => { |
138 |
| - let literal = literal.trim_matches('%'); |
139 |
| - self.literals.iter().any(|x| x == literal) |
140 |
| - } |
141 |
| - _ => false, |
142 |
| - }; |
143 |
| - |
144 |
| - matches_column && matches_pattern && !like.negated |
145 |
| - } |
146 |
| - _ => false, |
147 |
| - } |
148 |
| - } |
149 |
| -} |
| 1 | +// /* |
| 2 | +// * Parseable Server (C) 2022 - 2024 Parseable, Inc. |
| 3 | +// * |
| 4 | +// * This program is free software: you can redistribute it and/or modify |
| 5 | +// * it under the terms of the GNU Affero General Public License as |
| 6 | +// * published by the Free Software Foundation, either version 3 of the |
| 7 | +// * License, or (at your option) any later version. |
| 8 | +// * |
| 9 | +// * This program is distributed in the hope that it will be useful, |
| 10 | +// * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | +// * GNU Affero General Public License for more details. |
| 13 | +// * |
| 14 | +// * You should have received a copy of the GNU Affero General Public License |
| 15 | +// * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 16 | +// * |
| 17 | +// */ |
| 18 | +// use std::{collections::HashMap, sync::Arc}; |
| 19 | + |
| 20 | +// use arrow_schema::Field; |
| 21 | +// use datafusion::{ |
| 22 | +// common::DFSchema, |
| 23 | +// logical_expr::{Filter, LogicalPlan, Projection}, |
| 24 | +// optimizer::{optimize_children, OptimizerRule}, |
| 25 | +// prelude::{lit, or, Column, Expr}, |
| 26 | +// scalar::ScalarValue, |
| 27 | +// }; |
| 28 | + |
| 29 | +// /// Rewrites logical plan for source using projection and filter |
| 30 | +// pub struct FilterOptimizerRule { |
| 31 | +// pub column: String, |
| 32 | +// pub literals: Vec<String>, |
| 33 | +// } |
| 34 | + |
| 35 | +// // Try to add filter node on table scan |
| 36 | +// // As every table supports projection push down |
| 37 | +// // we try to directly add projection for column directly to table |
| 38 | +// // To preserve the orignal projection we must add a projection node with orignal projection |
| 39 | +// impl OptimizerRule for FilterOptimizerRule { |
| 40 | +// fn try_optimize( |
| 41 | +// &self, |
| 42 | +// plan: &datafusion::logical_expr::LogicalPlan, |
| 43 | +// config: &dyn datafusion::optimizer::OptimizerConfig, |
| 44 | +// ) -> datafusion::error::Result<Option<datafusion::logical_expr::LogicalPlan>> { |
| 45 | +// // if there are no patterns then the rule cannot be performed |
| 46 | +// let Some(filter_expr) = self.expr() else { |
| 47 | +// return Ok(None); |
| 48 | +// }; |
| 49 | + |
| 50 | +// if let LogicalPlan::Filter(filter) = plan { |
| 51 | +// if filter.predicate == filter_expr { |
| 52 | +// return Ok(None); |
| 53 | +// } |
| 54 | +// } |
| 55 | + |
| 56 | +// if let LogicalPlan::TableScan(table) = plan { |
| 57 | +// if table.projection.is_none() |
| 58 | +// || table |
| 59 | +// .filters |
| 60 | +// .iter() |
| 61 | +// .any(|expr| self.contains_valid_tag_filter(expr)) |
| 62 | +// { |
| 63 | +// return Ok(None); |
| 64 | +// } |
| 65 | + |
| 66 | +// let mut table = table.clone(); |
| 67 | +// let schema = &table.source.schema(); |
| 68 | +// let orignal_projection = table.projected_schema.clone(); |
| 69 | + |
| 70 | +// // add filtered column projection to table |
| 71 | +// if !table |
| 72 | +// .projected_schema |
| 73 | +// .has_column_with_unqualified_name(&self.column) |
| 74 | +// { |
| 75 | +// let tags_index = schema.index_of(&self.column)?; |
| 76 | +// let tags_field = schema.field(tags_index); |
| 77 | +// // modify source table projection to include tags |
| 78 | +// let df_schema = table.projected_schema.fields().clone(); |
| 79 | + |
| 80 | +// // from datafusion 37.1.0 -> 40.0.0 |
| 81 | +// // `DFField` has been removed |
| 82 | +// // `DFSchema.new_with_metadata()` has changed |
| 83 | +// // it requires `qualified_fields`(`Vec<(Option<TableReference>, Arc<Field>)>`) instead of `fields` |
| 84 | +// // hence, use `DFSchema::from_unqualified_fields()` for relatively unchanged code |
| 85 | + |
| 86 | +// df_schema.to_vec().push(Arc::new(Field::new( |
| 87 | +// tags_field.name(), |
| 88 | +// tags_field.data_type().clone(), |
| 89 | +// tags_field.is_nullable(), |
| 90 | +// ))); |
| 91 | + |
| 92 | +// table.projected_schema = |
| 93 | +// Arc::new(DFSchema::from_unqualified_fields(df_schema, HashMap::default())?); |
| 94 | +// if let Some(projection) = &mut table.projection { |
| 95 | +// projection.push(tags_index) |
| 96 | +// } |
| 97 | +// } |
| 98 | + |
| 99 | +// let filter = LogicalPlan::Filter(Filter::try_new( |
| 100 | +// filter_expr, |
| 101 | +// Arc::new(LogicalPlan::TableScan(table)), |
| 102 | +// )?); |
| 103 | +// let plan = LogicalPlan::Projection(Projection::new_from_schema( |
| 104 | +// Arc::new(filter), |
| 105 | +// orignal_projection, |
| 106 | +// )); |
| 107 | + |
| 108 | +// return Ok(Some(plan)); |
| 109 | +// } |
| 110 | + |
| 111 | +// // If we didn't find anything then recurse as normal and build the result. |
| 112 | + |
| 113 | +// // TODO: replace `optimize_children()` since it will be removed |
| 114 | +// // But it is not being used anywhere, so might as well just let it be for now |
| 115 | +// optimize_children(self, plan, config) |
| 116 | +// } |
| 117 | + |
| 118 | +// fn name(&self) -> &str { |
| 119 | +// "parseable_read_filter" |
| 120 | +// } |
| 121 | +// } |
| 122 | + |
| 123 | +// impl FilterOptimizerRule { |
| 124 | +// fn expr(&self) -> Option<Expr> { |
| 125 | +// let mut patterns = self.literals.iter().map(|literal| { |
| 126 | +// Expr::Column(Column::from_name(&self.column)).like(lit(format!("%{}%", literal))) |
| 127 | +// }); |
| 128 | + |
| 129 | +// let mut filter_expr = patterns.next()?; |
| 130 | +// for expr in patterns { |
| 131 | +// filter_expr = or(filter_expr, expr) |
| 132 | +// } |
| 133 | + |
| 134 | +// Some(filter_expr) |
| 135 | +// } |
| 136 | + |
| 137 | +// fn contains_valid_tag_filter(&self, expr: &Expr) -> bool { |
| 138 | +// match expr { |
| 139 | +// Expr::Like(like) => { |
| 140 | +// let matches_column = match &*like.expr { |
| 141 | +// Expr::Column(column) => column.name == self.column, |
| 142 | +// _ => return false, |
| 143 | +// }; |
| 144 | + |
| 145 | +// let matches_pattern = match &*like.pattern { |
| 146 | +// Expr::Literal(ScalarValue::Utf8(Some(literal))) => { |
| 147 | +// let literal = literal.trim_matches('%'); |
| 148 | +// self.literals.iter().any(|x| x == literal) |
| 149 | +// } |
| 150 | +// _ => false, |
| 151 | +// }; |
| 152 | + |
| 153 | +// matches_column && matches_pattern && !like.negated |
| 154 | +// } |
| 155 | +// _ => false, |
| 156 | +// } |
| 157 | +// } |
| 158 | +// } |
0 commit comments