diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml index 3e87310bafe16..d31346d1a8ff0 100644 --- a/.github/workflows/links.yml +++ b/.github/workflows/links.yml @@ -24,8 +24,13 @@ jobs: id: lychee uses: lycheeverse/lychee-action@v1.8.0 with: - args: "--base . --cache --max-cache-age 1d . --exclude 'https://github.com/databendlabs/databend/issues/' --exclude 'https?://twitter\\.com(?:/.*$)?$'" - + args: >- + --base . + --cache + --max-cache-age 1d . + --exclude 'https://github\.com/datafuselabs/databend/issues/.*' + --exclude 'https?://github\.com/databendlabs/databend/issues/.*' + --exclude 'https?://twitter\.com(?:/.*$)?$' - name: Save lychee cache uses: actions/cache/save@v3 if: always() diff --git a/README.md b/README.md index 30c53463f0488..ddbdea935c9f9 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Databend Serverless Cloud (beta) | Documentation | Benchmarking | - Roadmap (v1.3) + Roadmap (v1.3) @@ -22,7 +22,7 @@
- + CI Status @@ -35,11 +35,11 @@ -databend +databend ## 🐋 Introduction -**Databend**, built in Rust, is an open-source cloud data warehouse that serves as a cost-effective [alternative to Snowflake](https://github.com/datafuselabs/databend/issues/13059). With its focus on fast query execution and data ingestion, it's designed for complex analysis of the world's largest datasets. +**Databend**, built in Rust, is an open-source cloud data warehouse that serves as a cost-effective [alternative to Snowflake](https://github.com/databendlabs/databend/issues/13059). With its focus on fast query execution and data ingestion, it's designed for complex analysis of the world's largest datasets. **Production-Proven Scale:** - 🤝 **Enterprise Adoption**: Trusted by over **50 organizations** processing more than **100 million queries daily** @@ -53,7 +53,7 @@ -![Databend vs. Snowflake](https://github.com/datafuselabs/wizard/assets/172204/d796acf0-0a66-4b1d-8754-cd2cd1de04c7) +![Databend vs. Snowflake](https://github.com/databendlabs/wizard/assets/172204/d796acf0-0a66-4b1d-8754-cd2cd1de04c7)
@@ -61,7 +61,7 @@
-![Databend vs. Snowflake](https://github.com/datafuselabs/databend/assets/172204/c61d7a40-f6fe-4fb9-83e8-06ea9599aeb4) +![Databend vs. Snowflake](https://github.com/databendlabs/databend/assets/172204/c61d7a40-f6fe-4fb9-83e8-06ea9599aeb4) ## 🚀 Why Databend @@ -90,7 +90,7 @@ ## 📐 Architecture -![Databend Architecture](https://github.com/datafuselabs/databend/assets/172204/68b1adc6-0ec1-41d4-9e1d-37b80ce0e5ef) +![Databend Architecture](https://github.com/databendlabs/databend/assets/172204/68b1adc6-0ec1-41d4-9e1d-37b80ce0e5ef) ## 🚀 Try Databend @@ -280,7 +280,7 @@ Here are some resources to help you get started: For guidance on using Databend, we recommend starting with the official documentation. If you need further assistance, explore the following community channels: - [Slack](https://link.databend.com/join-slack) (For live discussion with the Community) -- [GitHub](https://github.com/datafuselabs/databend) (Feature/Bug reports, Contributions) +- [GitHub](https://github.com/databendlabs/databend) (Feature/Bug reports, Contributions) - [Twitter](https://twitter.com/DatabendLabs/) (Get the news fast) - [I'm feeling lucky](https://link.databend.com/i-m-feeling-lucky) (Pick up a good first issue now!) @@ -288,7 +288,7 @@ For guidance on using Databend, we recommend starting with the official document Stay updated with Databend's development journey. Here are our roadmap milestones: -- [Roadmap 2025](https://github.com/datafuselabs/databend/issues/14167) +- [Roadmap 2025](https://github.com/databendlabs/databend/issues/14167) ## 📜 License diff --git a/src/query/sql/src/planner/binder/bind_context.rs b/src/query/sql/src/planner/binder/bind_context.rs index 4acf198825e38..8bae80816e816 100644 --- a/src/query/sql/src/planner/binder/bind_context.rs +++ b/src/query/sql/src/planner/binder/bind_context.rs @@ -216,6 +216,12 @@ impl CteContext { pub fn set_cte_context(&mut self, cte_context: CteContext) { self.cte_map = cte_context.cte_map; } + + // Set cte context to current `BindContext`. + pub fn set_cte_context_and_name(&mut self, cte_context: CteContext) { + self.cte_map = cte_context.cte_map; + self.cte_name = cte_context.cte_name; + } } #[derive(Clone, Debug)] @@ -252,9 +258,31 @@ impl BindContext { } } - pub fn with_parent(parent: Box) -> Self { - BindContext { - parent: Some(parent.clone()), + pub fn depth(&self) -> usize { + if let Some(ref p) = self.parent { + return p.depth() + 1; + } + 1 + } + + pub fn with_opt_parent(parent: Option<&BindContext>) -> Result { + if let Some(p) = parent { + Self::with_parent(p.clone()) + } else { + Self::with_parent(Self::new()) + } + } + + pub fn with_parent(parent: BindContext) -> Result { + const MAX_DEPTH: usize = 4096; + if parent.depth() >= MAX_DEPTH { + return Err(ErrorCode::Internal( + "Query binder exceeds the maximum iterations", + )); + } + + Ok(BindContext { + parent: Some(Box::new(parent.clone())), columns: vec![], bound_internal_columns: BTreeMap::new(), aggregate_info: Default::default(), @@ -273,7 +301,7 @@ impl BindContext { expr_context: ExprContext::default(), planning_agg_index: false, window_definitions: DashMap::new(), - } + }) } /// Create a new BindContext with self's parent as its parent diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_subquery.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_subquery.rs index 16c1c5810d7c8..1ef3f38047b70 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_subquery.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_subquery.rs @@ -32,18 +32,14 @@ impl Binder { // If the subquery is a lateral subquery, we need to let it see the columns // from the previous queries. let (result, mut result_bind_context) = if lateral { - let mut new_bind_context = BindContext::with_parent(Box::new(bind_context.clone())); + let mut new_bind_context = BindContext::with_parent(bind_context.clone())?; self.bind_query(&mut new_bind_context, subquery)? } else { - let mut new_bind_context = BindContext::with_parent( - bind_context - .parent - .clone() - .unwrap_or_else(|| Box::new(BindContext::new())), - ); + let mut new_bind_context = + BindContext::with_opt_parent(bind_context.parent.as_ref().map(|c| c.as_ref()))?; new_bind_context .cte_context - .set_cte_context(bind_context.cte_context.clone()); + .set_cte_context_and_name(bind_context.cte_context.clone()); self.bind_query(&mut new_bind_context, subquery)? }; diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs index 1e36d39c25758..32a17a0512efa 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs @@ -200,7 +200,7 @@ impl Binder { self.ctx .add_streams_ref(&catalog, &database, &table_name, consume); } - let mut new_bind_context = BindContext::with_parent(Box::new(bind_context.clone())); + let mut new_bind_context = BindContext::with_parent(bind_context.clone())?; let tokens = tokenize_sql(query.as_str())?; let (stmt, _) = parse_sql(&tokens, self.dialect)?; let Statement::Query(query) = &stmt else { @@ -246,7 +246,7 @@ impl Binder { let tokens = tokenize_sql(query.as_str())?; let (stmt, _) = parse_sql(&tokens, self.dialect)?; // For view, we need use a new context to bind it. - let mut new_bind_context = BindContext::with_parent(Box::new(bind_context.clone())); + let mut new_bind_context = BindContext::with_parent(bind_context.clone())?; new_bind_context.view_info = Some((database.clone(), table_name)); if let Statement::Query(query) = &stmt { self.metadata.write().add_table( diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs index df7ab473deb91..09c4cfadf3c56 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs @@ -324,7 +324,7 @@ impl Binder { alias, .. } => { - let mut bind_context = BindContext::with_parent(Box::new(parent_context.clone())); + let mut bind_context = BindContext::with_parent(parent_context.clone())?; let func_name = normalize_identifier(name, &self.name_resolution_ctx); if BUILTIN_FUNCTIONS diff --git a/src/query/sql/src/planner/binder/ddl/index.rs b/src/query/sql/src/planner/binder/ddl/index.rs index 55a9156c1ee8c..3163fc7f97ce1 100644 --- a/src/query/sql/src/planner/binder/ddl/index.rs +++ b/src/query/sql/src/planner/binder/ddl/index.rs @@ -166,8 +166,7 @@ impl Binder { for (index_id, _, index_meta) in indexes { let tokens = tokenize_sql(&index_meta.query)?; let (stmt, _) = parse_sql(&tokens, self.dialect)?; - let mut new_bind_context = - BindContext::with_parent(Box::new(bind_context.clone())); + let mut new_bind_context = BindContext::with_parent(bind_context.clone())?; new_bind_context.planning_agg_index = true; if let Statement::Query(query) = &stmt { let (s_expr, _) = self.bind_query(&mut new_bind_context, query)?; diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index ccc6c7601d5f0..863d316f13ad6 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -96,7 +96,7 @@ impl Binder { } } } - let bind_context = BindContext::with_parent(Box::new(bind_context.clone())); + let bind_context = BindContext::with_parent(bind_context.clone())?; Ok(( SExpr::create_leaf(Arc::new(DummyTableScan.into())), bind_context, @@ -162,15 +162,14 @@ impl Binder { cte_info: &CteInfo, ) -> Result<(SExpr, BindContext)> { if let Some(cte_name) = &bind_context.cte_context.cte_name { - // `cte_name` exists, which means the current cte is a nested cte - // If the `cte_name` is the same as the current cte's name, it means the cte is recursive if cte_name == table_name { - return Err(ErrorCode::SemanticError( - "The cte is not recursive, but it references itself.".to_string(), - ) + return Err(ErrorCode::SemanticError(format!( + "The cte {table_name} is not recursive, but it references itself.", + )) .set_span(span)); } } + let mut new_bind_context = BindContext { parent: Some(Box::new(bind_context.clone())), bound_internal_columns: BTreeMap::new(), @@ -236,7 +235,7 @@ impl Binder { cte_name: &str, alias: &Option, ) -> Result<(SExpr, BindContext)> { - let mut new_bind_ctx = BindContext::with_parent(Box::new(bind_context.clone())); + let mut new_bind_ctx = BindContext::with_parent(bind_context.clone())?; let mut metadata = self.metadata.write(); let mut columns = cte_info.columns.clone(); for (index, column_name) in cte_info.columns_alias.iter().enumerate() { @@ -357,7 +356,7 @@ impl Binder { change_type: Option, sample: &Option, ) -> Result<(SExpr, BindContext)> { - let mut bind_context = BindContext::with_parent(Box::new(bind_context.clone())); + let mut bind_context = BindContext::with_parent(bind_context.clone())?; let table = self.metadata.read().table(table_index).clone(); let table_name = table.name(); diff --git a/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs b/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs index c208878c78fda..bff72ef629d02 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs @@ -71,9 +71,14 @@ impl SubqueryRewriter { let mut metadata = self.metadata.write(); // Currently, we don't support left plan's from clause contains subquery. // Such as: select t2.a from (select a + 1 as a from t) as t2 where (select sum(a) from t as t1 where t1.a < t2.a) = 1; - let table_index = metadata - .table_index_by_column_indexes(correlated_columns) - .unwrap(); + let table_index = + match metadata.table_index_by_column_indexes(correlated_columns) { + Some(index) => index, + None => return Err(ErrorCode::SemanticError( + "Join left plan's from clause can't contain subquery to dcorrelated join right plan", + )), + }; + let mut data_types = Vec::with_capacity(correlated_columns.len()); let mut scalar_items = vec![]; let mut scan_columns = ColumnSet::new(); @@ -231,7 +236,7 @@ impl SubqueryRewriter { self.flatten_expression_scan(plan, scan, correlated_columns) } - _ => Err(ErrorCode::Internal( + _ => Err(ErrorCode::SemanticError( "Invalid plan type for flattening subquery", )), } @@ -694,7 +699,7 @@ impl SubqueryRewriter { .iter() .any(|index| correlated_columns.contains(index)) { - return Err(ErrorCode::Internal( + return Err(ErrorCode::SemanticError( "correlated columns in window functions not supported", )); } diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index b918f04b0446b..d25303c90a52b 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -3134,11 +3134,11 @@ impl<'a> TypeChecker<'a> { ); // Create new `BindContext` with current `bind_context` as its parent, so we can resolve outer columns. - let mut bind_context = BindContext::with_parent(Box::new(self.bind_context.clone())); + let mut bind_context = BindContext::with_parent(self.bind_context.clone())?; let (s_expr, output_context) = binder.bind_query(&mut bind_context, subquery)?; self.bind_context .cte_context - .set_cte_context(output_context.cte_context); + .set_cte_context_and_name(output_context.cte_context); if (typ == SubqueryType::Scalar || typ == SubqueryType::Any) && output_context.columns.len() > 1 @@ -4644,7 +4644,7 @@ impl<'a> TypeChecker<'a> { expr: &Expr, list: &[Expr], ) -> Result> { - let mut bind_context = BindContext::with_parent(Box::new(self.bind_context.clone())); + let mut bind_context = BindContext::with_parent(self.bind_context.clone())?; let mut values = Vec::with_capacity(list.len()); for val in list.iter() { values.push(vec![val.clone()]) diff --git a/tests/sqllogictests/suites/query/cte/cte.test b/tests/sqllogictests/suites/query/cte/cte.test index 6d6b9cef27b9b..ce256305046db 100644 --- a/tests/sqllogictests/suites/query/cte/cte.test +++ b/tests/sqllogictests/suites/query/cte/cte.test @@ -483,6 +483,9 @@ CREATE OR REPLACE TABLE sales ( net_paid DECIMAL(10, 2) NOT NULL ) row_per_block=51113; +query error +WITH t4(x) AS (select x + 3 from (select * from t4) where x < 10) SELECT * FROM t4; + query error WITH InitialSales AS ( SELECT diff --git a/tests/sqllogictests/suites/query/subquery.test b/tests/sqllogictests/suites/query/subquery.test index dfd553e964797..1e4d92399e2de 100644 --- a/tests/sqllogictests/suites/query/subquery.test +++ b/tests/sqllogictests/suites/query/subquery.test @@ -7,6 +7,10 @@ DROP TABLE IF EXISTS c statement ok DROP TABLE IF EXISTS o + +query error 1065 +SELECT * FROM (SELECT 1 AS x) AS ss1 LEFT OUTER JOIN (SELECT 2 DIV 228 AS y) AS ss2 ON TRUE, LATERAL (SELECT ss2.y AS z LIMIT 1) AS ss3 + statement ok CREATE TABLE c (c_id INT NULL, bill VARCHAR NULL) @@ -817,7 +821,7 @@ CREATE TABLE `merge_log` ( `created_at` TIMESTAMP NULL ) ENGINE = FUSE; -query +query SELECT (SELECT MAX(created_at) FROM merge_log @@ -851,7 +855,7 @@ statement ok insert into test_group values ('2024-01-01', 100),('2024-01-01', 200),('2024-01-02', 400),('2024-01-02', 800); query TI -select input_date, sum(value), +select input_date, sum(value), (select sum(value) from test_group b where b.input_date >= DATE_TRUNC(YEAR, input_date) diff --git a/tests/suites/0_stateless/05_hints/05_0001_set_var.result b/tests/suites/0_stateless/05_hints/05_0001_set_var.result index b28b974434947..f637fe754b465 100644 --- a/tests/suites/0_stateless/05_hints/05_0001_set_var.result +++ b/tests/suites/0_stateless/05_hints/05_0001_set_var.result @@ -21,7 +21,7 @@ storage_read_buffer_size 1048576 America/Toronto America/Toronto 1 -2022-02-02 03:00:00 -2022-02-02 03:00:00 +2022-02-02 03:00:00.000000 +2022-02-02 03:00:00.000000 1 13 Asia/Shanghai