feat: specify flags in a more flexible way in planner test (#103)

Gun9niR · web-flow · commit 30c76e8aa395 · 2024-03-12T21:00:18.000-04:00
In planner test we might want to specify multiple flags for one task.
For instance, for `explain` we might want `with_logical` and `verbose`.
With the current syntax, however, we need to define multiple tasks for
this: `explain_with_logical_verbose`, `explain_with_logical`,
`explain_verbose`, ... This PR moves such options to a square bracket
right after the task name, so where we used to have
`explain_with_logical`/`explain_with_logical_verbose` we now have
`explain[with_logical]`, `explain[with_logical, verbose]`.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/optd-perftest/src/datafusion_db_cardtest.rs b/optd-perftest/src/datafusion_db_cardtest.rs
@@ -19,14 +19,14 @@ impl CardtestRunnerDBHelper for DatafusionDb {
 
 // helper functions for ```impl CardtestRunnerDBHelper for DatafusionDb```
 // they can't be put in an impl because DatafusionDb is a foreign struct
-async fn _eval_query_truecard(slf: &DatafusionDb, sql: &str) -> anyhow::Result<usize> {
-    let rows = slf.execute(sql, true).await?;
+async fn _eval_query_truecard(db: &DatafusionDb, sql: &str) -> anyhow::Result<usize> {
+    let rows = db.execute(sql, true).await?;
     let num_rows = rows.len();
     Ok(num_rows)
 }
 
-async fn _eval_query_estcard(slf: &DatafusionDb, _sql: &str) -> anyhow::Result<usize> {
-    let rows = slf.execute("EXPLAIN SELECT * FROM t1;", true).await?;
+async fn _eval_query_estcard(db: &DatafusionDb, _sql: &str) -> anyhow::Result<usize> {
+    let rows = db.execute("EXPLAIN SELECT * FROM t1;", true).await?;
     println!("eval_est_card(): rows={:?}", rows);
     Ok(12)
 }
diff --git a/optd-sqlplannertest/Cargo.toml b/optd-sqlplannertest/Cargo.toml
@@ -36,6 +36,7 @@ optd-datafusion-repr = { path = "../optd-datafusion-repr" }
 tracing-subscriber = "0.3"
 tracing = "0.1"
 itertools = "0.11"
+lazy_static = "1.4.0"
 
 [[test]]
 name = "planner_test"
diff --git a/optd-sqlplannertest/README.md b/optd-sqlplannertest/README.md
@@ -1,20 +1,39 @@
-= Usage
+# Usage
 
-**Update the test cases**
+## Update the test cases
 
 ```shell
 cargo run -p optd-sqlplannertest --bin planner_test_apply
 ```
 
-**Verify the test cases**
+## Verify the test cases
 
 ```shell
 cargo test -p optd-sqlplannertest
 # or use nextest
 cargo nextest run -p optd-sqlplannertest
 ```
 
-The `explain` and `execute` task will be run with datafusion's logical optimizer disabled. To keep using datafusion's logical optimizer, you could use the `execute_with_logical` and `explain_with_logical` tasks instead.
+## Tasks
+
+The `explain` and `execute` task will be run with datafusion's logical optimizer disabled. Each task has some toggleable flags to control its behavior.
+
+### `execute` Task
+
+#### Flags
+
+| Name | Description |
+| -- | -- |
+| with_logical | Enable Datafusion's logical optimizer |
+
+### Explain Task
+
+#### Flags
+
+| Name | Description |
+| -- | -- |
+| with_logical | Enable Datafusion's logical optimizer |
+| verbose | Display estimated cost in physical plan |
 
 Currently we have the following options for the explain task:
 
@@ -24,4 +43,3 @@ Currently we have the following options for the explain task:
 - `physical_datafusion`: datafusion's physical plan.
 - `join_orders`: physical join orders.
 - `logical_join_orders`: logical join orders.
-
diff --git a/optd-sqlplannertest/src/lib.rs b/optd-sqlplannertest/src/lib.rs
@@ -7,9 +7,11 @@ use datafusion::sql::parser::DFParser;
 use datafusion::sql::sqlparser::dialect::GenericDialect;
 use datafusion_optd_cli::helper::unescape_input;
 use itertools::Itertools;
+use lazy_static::lazy_static;
 use mimalloc::MiMalloc;
 use optd_datafusion_bridge::{DatafusionCatalog, OptdQueryPlanner};
 use optd_datafusion_repr::DatafusionOptimizer;
+use regex::Regex;
 use std::sync::Arc;
 
 #[global_allocator]
@@ -118,8 +120,9 @@ impl DatafusionDb {
     }
 
     /// Executes the `execute` task.
-    async fn task_execute(&mut self, r: &mut String, sql: &str, with_logical: bool) -> Result<()> {
+    async fn task_execute(&mut self, r: &mut String, sql: &str, flags: &[String]) -> Result<()> {
         use std::fmt::Write;
+        let with_logical = flags.contains(&"with_logical".to_string());
         let result = self.execute(sql, with_logical).await?;
         writeln!(r, "{}", result.into_iter().map(|x| x.join(" ")).join("\n"))?;
         writeln!(r)?;
@@ -132,18 +135,17 @@ impl DatafusionDb {
         r: &mut String,
         sql: &str,
         task: &str,
-        with_logical: bool,
+        flags: &[String],
     ) -> Result<()> {
         use std::fmt::Write;
 
+        let with_logical = flags.contains(&"with_logical".to_string());
+        let _verbose = flags.contains(&"verbose".to_string());
+
         let result = self
             .execute(&format!("explain {}", &sql), with_logical)
             .await?;
-        let subtask_start_pos = if with_logical {
-            "explain_with_logical:".len()
-        } else {
-            "explain:".len()
-        };
+        let subtask_start_pos = task.find(':').unwrap() + 1;
         for subtask in task[subtask_start_pos..].split(',') {
             let subtask = subtask.trim();
             if subtask == "logical_datafusion" {
@@ -227,16 +229,33 @@ impl sqlplannertest::PlannerTestRunner for DatafusionDb {
         let mut result = String::new();
         let r = &mut result;
         for task in &test_case.tasks {
-            if task == "execute" {
-                self.task_execute(r, &test_case.sql, false).await?;
-            } else if task == "execute_with_logical" {
-                self.task_execute(r, &test_case.sql, true).await?;
-            } else if task.starts_with("explain:") {
-                self.task_explain(r, &test_case.sql, task, false).await?;
-            } else if task.starts_with("explain_with_logical:") {
-                self.task_explain(r, &test_case.sql, task, true).await?;
+            let flags = extract_flags(task)?;
+            if task.starts_with("execute") {
+                self.task_execute(r, &test_case.sql, &flags).await?;
+            } else if task.starts_with("explain") {
+                self.task_explain(r, &test_case.sql, task, &flags).await?;
             }
         }
         Ok(result)
     }
 }
+
+lazy_static! {
+    static ref FLAGS_REGEX: Regex = Regex::new(r"\[(.*)\]").unwrap();
+}
+
+/// Extract the flags from a task. The flags are specified in square brackets.
+/// For example, the flags for the task `explain[with_logical, verbose]` are `["with_logical", "verbose"]`.
+fn extract_flags(task: &str) -> Result<Vec<String>> {
+    if let Some(captures) = FLAGS_REGEX.captures(task) {
+        Ok(captures
+            .get(1)
+            .unwrap()
+            .as_str()
+            .split(',')
+            .map(|x| x.trim().to_string())
+            .collect())
+    } else {
+        Ok(vec![])
+    }
+}
diff --git a/optd-sqlplannertest/tests/constant_predicate.yml b/optd-sqlplannertest/tests/constant_predicate.yml
@@ -2,19 +2,19 @@
     create table t1(t1v1 int, t1v2 int);
     insert into t1 values (0, 0), (1, 1), (2, 2);
   tasks:
-    - execute_with_logical
+    - execute[with_logical]
 - sql: |
     select * from t1 where t1v1 = 0;
   desc: Test whether the optimizer handles integer equality predicates correctly.
   tasks:
-    - execute_with_logical
+    - execute[with_logical]
 - sql: |
     select * from t1 where t1v1 = 0 and t1v2 = 1;
   desc: Test whether the optimizer handles multiple integer equality predicates correctly.
   tasks:
-    - execute_with_logical
+    - execute[with_logical]
 - sql: |
     select * from t1 where t1v1 = 0 and t1v2 != 1;
   desc: Test whether the optimizer handles multiple integer inequality predicates correctly.
   tasks:
-    - execute_with_logical
+    - execute[with_logical]
diff --git a/optd-sqlplannertest/tests/join_enumerate.planner.sql b/optd-sqlplannertest/tests/join_enumerate.planner.sql
@@ -16,14 +16,7 @@ insert into t3 values (0, 300), (1, 301), (2, 302);
 select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
 
 /*
-(Join t2 (Join t1 t3))
-(Join t2 (Join t3 t1))
-(Join t3 (Join t1 t2))
-(Join t3 (Join t2 t1))
-(Join (Join t1 t2) t3)
-(Join (Join t1 t3) t2)
 (Join (Join t2 t1) t3)
-(Join (Join t3 t1) t2)
 
 0 200 0 0 0 300
 1 201 1 1 1 301
diff --git a/optd-sqlplannertest/tests/join_enumerate.yml b/optd-sqlplannertest/tests/join_enumerate.yml
@@ -6,16 +6,16 @@
     insert into t2 values (0, 200), (1, 201), (2, 202);
     insert into t3 values (0, 300), (1, 301), (2, 302);
   tasks:
-    - execute_with_logical
+    - execute[with_logical]
 - sql: |
     select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
   desc: Test whether the optimizer enumerates all join orders.
   tasks:
-    - explain_with_logical:logical_join_orders
-    - execute_with_logical
+    - explain[with_logcial]:logical_join_orders
+    - execute[with_logical]
 - sql: |
     select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2;
   desc: Test whether the optimizer enumerates all join orders.
   tasks:
-    - explain_with_logical:logical_join_orders
-    - execute_with_logical
+    - explain[with_logical]:logical_join_orders
+    - execute[with_logical]
diff --git a/optd-sqlplannertest/tests/tpch.yml b/optd-sqlplannertest/tests/tpch.yml
@@ -156,7 +156,7 @@
       limit 100;
   desc: TPC-H Q2
   tasks:
-      - explain_with_logical:logical_optd,physical_optd
+      - explain[with_logical]:logical_optd,physical_optd
 - sql: |
       SELECT
           l_orderkey,
@@ -182,7 +182,7 @@
           o_orderdate LIMIT 10;
   desc: TPC-H Q3
   tasks:
-      - explain_with_logical:logical_optd,physical_optd
+      - explain[with_logical]:logical_optd,physical_optd
 - sql: |
       SELECT
           n_name AS nation,
@@ -493,7 +493,7 @@
           s_suppkey;
   desc: TPC-H Q15
   tasks:
-      - explain_with_logical:logical_optd,physical_optd
+      - explain[with_logical]:logical_optd,physical_optd
 - sql: |
       SELECT
           ROUND(SUM(l_extendedprice) / 7.0, 16) AS avg_yearly 
@@ -514,7 +514,7 @@
           );
   desc: TPC-H Q17
   tasks:
-      - explain_with_logical:logical_optd,physical_optd
+      - explain[with_logical]:logical_optd,physical_optd
 - sql: |
       SELECT
           sum(l_extendedprice* (1 - l_discount)) as revenue