|
17 | 17 |
|
18 | 18 | use std::vec;
|
19 | 19 |
|
| 20 | +use datafusion::arrow::datatypes::{DataType, TimeUnit}; |
| 21 | +use datafusion::functions::datetime::to_date::ToDateFunc; |
20 | 22 | use datafusion::functions::datetime::to_timestamp::ToTimestampFunc;
|
21 |
| -use datafusion::logical_expr::{Expr, Operator}; |
| 23 | +use datafusion::logical_expr::{Cast, Expr, Operator}; |
22 | 24 | use datafusion::scalar::ScalarValue;
|
23 | 25 | use iceberg::expr::{BinaryExpression, Predicate, PredicateOperator, Reference, UnaryExpression};
|
24 | 26 | use iceberg::spec::Datum;
|
@@ -120,19 +122,50 @@ fn to_iceberg_predicate(expr: &Expr) -> TransformedResult {
|
120 | 122 | _ => TransformedResult::NotTransformed,
|
121 | 123 | }
|
122 | 124 | }
|
123 |
| - Expr::Cast(c) => to_iceberg_predicate(&c.expr), |
| 125 | + Expr::Cast(c) => { |
| 126 | + if DataType::Date32 == c.data_type || DataType::Date64 == c.data_type { |
| 127 | + match c.expr.as_ref() { |
| 128 | + Expr::Literal(ScalarValue::Utf8(Some(literal))) => { |
| 129 | + let date = literal.split('T').next(); |
| 130 | + if let Some(date) = date { |
| 131 | + return TransformedResult::Literal(Datum::string(date)); |
| 132 | + } |
| 133 | + } |
| 134 | + _ => return TransformedResult::NotTransformed, |
| 135 | + } |
| 136 | + } |
| 137 | + to_iceberg_predicate(&c.expr) |
| 138 | + } |
124 | 139 | Expr::ScalarFunction(func) => {
|
125 | 140 | if func
|
126 | 141 | .func
|
127 | 142 | .inner()
|
128 | 143 | .as_any()
|
129 | 144 | .downcast_ref::<ToTimestampFunc>()
|
130 | 145 | .is_some()
|
| 146 | + // More than 1 argument means it's a custom format - not |
| 147 | + // supported for now |
131 | 148 | && func.args.len() == 1
|
| 149 | + { |
| 150 | + return to_iceberg_predicate(&Expr::Cast(Cast::new( |
| 151 | + Box::new(func.args[0].clone()), |
| 152 | + DataType::Timestamp(TimeUnit::Nanosecond, None), |
| 153 | + ))); |
| 154 | + } |
| 155 | + if func |
| 156 | + .func |
| 157 | + .inner() |
| 158 | + .as_any() |
| 159 | + .downcast_ref::<ToDateFunc>() |
| 160 | + .is_some() |
132 | 161 | // More than 1 argument means it's a custom format - not
|
133 | 162 | // supported for now
|
| 163 | + && func.args.len() == 1 |
134 | 164 | {
|
135 |
| - return to_iceberg_predicate(&func.args[0]); |
| 165 | + return to_iceberg_predicate(&Expr::Cast(Cast::new( |
| 166 | + Box::new(func.args[0].clone()), |
| 167 | + DataType::Date32, |
| 168 | + ))); |
136 | 169 | }
|
137 | 170 | TransformedResult::NotTransformed
|
138 | 171 | }
|
@@ -446,12 +479,28 @@ mod tests {
|
446 | 479 | assert_eq!(predicate, None);
|
447 | 480 | }
|
448 | 481 |
|
449 |
| - //#[test] |
450 |
| - //fn test_to_date_comparison_creates_predicate() { |
451 |
| - // let sql = "TO_DATE(ts) >= CAST('2023-01-05T00:00:00' AS DATE)"; |
452 |
| - // let predicate = convert_to_iceberg_predicate(sql).unwrap(); |
453 |
| - // let expected_predicate = |
454 |
| - // Reference::new("ts").greater_than_or_equal_to(Datum::string("2023-01-05")); |
455 |
| - // assert_eq!(predicate, expected_predicate); |
456 |
| - //} |
| 482 | + #[test] |
| 483 | + fn test_to_date_comparison_creates_predicate() { |
| 484 | + let sql = "ts >= CAST('2023-01-05T11:11:11' AS DATE)"; |
| 485 | + let predicate = convert_to_iceberg_predicate(sql).unwrap(); |
| 486 | + let expected_predicate = |
| 487 | + Reference::new("ts").greater_than_or_equal_to(Datum::string("2023-01-05")); |
| 488 | + assert_eq!(predicate, expected_predicate); |
| 489 | + } |
| 490 | + |
| 491 | + #[test] |
| 492 | + /// When casting to DATE, usually the value is converted to datetime or timestamp, |
| 493 | + /// and then it is truncated. DayTransform is not yet supported fully here. |
| 494 | + /// It is specifically implemented for Strings because it is the most common use case. |
| 495 | + /// When actual support is implemented, this test will fail and should be removed. |
| 496 | + /// For now it is here in order to make sure that the value from within the cast |
| 497 | + /// is not used as-is when casting to date, because it can create false predicates. |
| 498 | + /// |
| 499 | + /// (Consider for example `ts > CAST('2023-01-05T11:11:11' AS DATE)` which would be |
| 500 | + /// creates a different predicate than `ts > CAST('2023-01-05T11:11:11' AS TIMESTAMP)`) |
| 501 | + fn test_to_date_from_non_string_is_ignored() { |
| 502 | + let sql = "ts >= CAST(123456789 AS DATE)"; |
| 503 | + let predicate = convert_to_iceberg_predicate(sql); |
| 504 | + assert_eq!(predicate, None); |
| 505 | + } |
457 | 506 | }
|
0 commit comments