Skip to content

Commit 735e7df

Browse files
committed
Convert function calls to cast, deal with dates
1 parent 0e07afb commit 735e7df

File tree

1 file changed

+60
-11
lines changed

1 file changed

+60
-11
lines changed

crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs

Lines changed: 60 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717

1818
use std::vec;
1919

20+
use datafusion::arrow::datatypes::{DataType, TimeUnit};
21+
use datafusion::functions::datetime::to_date::ToDateFunc;
2022
use datafusion::functions::datetime::to_timestamp::ToTimestampFunc;
21-
use datafusion::logical_expr::{Expr, Operator};
23+
use datafusion::logical_expr::{Cast, Expr, Operator};
2224
use datafusion::scalar::ScalarValue;
2325
use iceberg::expr::{BinaryExpression, Predicate, PredicateOperator, Reference, UnaryExpression};
2426
use iceberg::spec::Datum;
@@ -120,19 +122,50 @@ fn to_iceberg_predicate(expr: &Expr) -> TransformedResult {
120122
_ => TransformedResult::NotTransformed,
121123
}
122124
}
123-
Expr::Cast(c) => to_iceberg_predicate(&c.expr),
125+
Expr::Cast(c) => {
126+
if DataType::Date32 == c.data_type || DataType::Date64 == c.data_type {
127+
match c.expr.as_ref() {
128+
Expr::Literal(ScalarValue::Utf8(Some(literal))) => {
129+
let date = literal.split('T').next();
130+
if let Some(date) = date {
131+
return TransformedResult::Literal(Datum::string(date));
132+
}
133+
}
134+
_ => return TransformedResult::NotTransformed,
135+
}
136+
}
137+
to_iceberg_predicate(&c.expr)
138+
}
124139
Expr::ScalarFunction(func) => {
125140
if func
126141
.func
127142
.inner()
128143
.as_any()
129144
.downcast_ref::<ToTimestampFunc>()
130145
.is_some()
146+
// More than 1 argument means it's a custom format - not
147+
// supported for now
131148
&& func.args.len() == 1
149+
{
150+
return to_iceberg_predicate(&Expr::Cast(Cast::new(
151+
Box::new(func.args[0].clone()),
152+
DataType::Timestamp(TimeUnit::Nanosecond, None),
153+
)));
154+
}
155+
if func
156+
.func
157+
.inner()
158+
.as_any()
159+
.downcast_ref::<ToDateFunc>()
160+
.is_some()
132161
// More than 1 argument means it's a custom format - not
133162
// supported for now
163+
&& func.args.len() == 1
134164
{
135-
return to_iceberg_predicate(&func.args[0]);
165+
return to_iceberg_predicate(&Expr::Cast(Cast::new(
166+
Box::new(func.args[0].clone()),
167+
DataType::Date32,
168+
)));
136169
}
137170
TransformedResult::NotTransformed
138171
}
@@ -446,12 +479,28 @@ mod tests {
446479
assert_eq!(predicate, None);
447480
}
448481

449-
//#[test]
450-
//fn test_to_date_comparison_creates_predicate() {
451-
// let sql = "TO_DATE(ts) >= CAST('2023-01-05T00:00:00' AS DATE)";
452-
// let predicate = convert_to_iceberg_predicate(sql).unwrap();
453-
// let expected_predicate =
454-
// Reference::new("ts").greater_than_or_equal_to(Datum::string("2023-01-05"));
455-
// assert_eq!(predicate, expected_predicate);
456-
//}
482+
#[test]
483+
fn test_to_date_comparison_creates_predicate() {
484+
let sql = "ts >= CAST('2023-01-05T11:11:11' AS DATE)";
485+
let predicate = convert_to_iceberg_predicate(sql).unwrap();
486+
let expected_predicate =
487+
Reference::new("ts").greater_than_or_equal_to(Datum::string("2023-01-05"));
488+
assert_eq!(predicate, expected_predicate);
489+
}
490+
491+
#[test]
492+
/// When casting to DATE, usually the value is converted to datetime or timestamp,
493+
/// and then it is truncated. DayTransform is not yet supported fully here.
494+
/// It is specifically implemented for Strings because it is the most common use case.
495+
/// When actual support is implemented, this test will fail and should be removed.
496+
/// For now it is here in order to make sure that the value from within the cast
497+
/// is not used as-is when casting to date, because it can create false predicates.
498+
///
499+
/// (Consider for example `ts > CAST('2023-01-05T11:11:11' AS DATE)` which would be
500+
/// creates a different predicate than `ts > CAST('2023-01-05T11:11:11' AS TIMESTAMP)`)
501+
fn test_to_date_from_non_string_is_ignored() {
502+
let sql = "ts >= CAST(123456789 AS DATE)";
503+
let predicate = convert_to_iceberg_predicate(sql);
504+
assert_eq!(predicate, None);
505+
}
457506
}

0 commit comments

Comments
 (0)