From 0ff6b0d258828ea46638229318d61e3194d4376d Mon Sep 17 00:00:00 2001 From: Andrei Dragomir Date: Wed, 3 Apr 2024 20:26:10 +0300 Subject: [PATCH] Add nested struct reference capability to datafusion-sql --- datafusion/sql/src/expr/mod.rs | 3 + datafusion/sql/tests/sql_integration.rs | 98 +++++++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 0d1db8a29cceb..a4131c152c055 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -937,6 +937,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { planner_context: &mut PlannerContext, ) -> Result { let field = match expr.clone() { + SQLExpr::Identifier(ident) => GetFieldAccess::NamedStructField { + name: ScalarValue::from(ident.value), + }, SQLExpr::Value( Value::SingleQuotedString(s) | Value::DoubleQuotedString(s), ) => GetFieldAccess::NamedStructField { diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 319aa5b5fd302..220d41a3c8783 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -52,6 +52,104 @@ fn test_schema_support() { ); } +#[derive(Default)] +struct AdrCtx { + options: ConfigOptions, + udfs: HashMap>, + udafs: HashMap>, +} + +impl AdrCtx { + fn options_mut(&mut self) -> &mut ConfigOptions { + &mut self.options + } + + fn with_udf(mut self, udf: ScalarUDF) -> Self { + self.udfs.insert(udf.name().to_string(), Arc::new(udf)); + self + } +} + +impl ContextProvider for AdrCtx { + fn get_table_source(&self, name: TableReference) -> Result> { + let schema = match name.table() { + "tbl" => Ok(Schema::new(vec![ + Field::new( + "struct_field", + DataType::Struct( + Fields::from(vec![ + Field::new( + "subfield1", + DataType::List( + Arc::new( + Field::new( + "substruct1", + DataType::Struct( + Fields::from(vec![ + Field::new("subsubfield1", DataType::Int32, true), + Field::new("subsubfield2", DataType::Binary, true), + ]) + ), + true + ) + ) + ), + true + ) + ]) + ), + true + ), + ])), + _ => plan_err!("No table named: {} found", name.table()), + }; + match schema { + Ok(t) => Ok(Arc::new(EmptyTable::new(Arc::new(t)))), + Err(e) => Err(e), + } + } + + fn get_function_meta(&self, name: &str) -> Option> { self.udfs.get(name).cloned() } + fn get_aggregate_meta(&self, name: &str) -> Option> { self.udafs.get(name).cloned() } + fn get_variable_type(&self, _: &[String]) -> Option { + unimplemented!() + } + fn get_window_meta(&self, _name: &str) -> Option> { + None + } + fn options(&self) -> &ConfigOptions { + &self.options + } + fn create_cte_work_table(&self, _name: &str, schema: SchemaRef) -> Result> { Ok(Arc::new(EmptyTable::new(schema))) } + fn udfs_names(&self) -> Vec { + self.udfs.keys().cloned().collect() + } + fn udafs_names(&self) -> Vec { + self.udafs.keys().cloned().collect() + } + fn udwfs_names(&self) -> Vec { + Vec::new() + } +} + +#[test] +fn test_adr_1() { + let dialect = &GenericDialect {}; + let sql = r#"SELECT struct_field["subfield1"][0]["subsubfield1"] FROM tbl;"#; + println!("sql: {}", &sql); + let result = DFParser::parse_sql_with_dialect(sql, dialect); + println!("result: {:?}", result); + let mut ast = result.unwrap(); + println!("ast: {:?}", ast); + let context = AdrCtx::default(); + let planner = SqlToRel::new(&context); + let mut parse_result = DFParser::parse_sql_with_dialect(sql, dialect).unwrap(); + let plan_result = planner.statement_to_plan(parse_result.pop_front().unwrap()); + println!("result: {:?}", plan_result); + + // planner.statement_to_plan(ast.pop_front().unwrap()) +} + #[test] fn parse_decimals() { let test_data = [