cmu-db
diff --git a/‎Cargo.lock
Lines changed: 397 additions & 232 deletions b/‎Cargo.lock
Lines changed: 397 additions & 232 deletions
diff --git a/‎optd-sqlplannertest/Cargo.toml
Lines changed: 9 additions & 1 deletion b/‎optd-sqlplannertest/Cargo.toml
Lines changed: 9 additions & 1 deletion
diff --git a/‎optd-sqlplannertest/README.md
Lines changed: 50 additions & 7 deletions b/‎optd-sqlplannertest/README.md
Lines changed: 50 additions & 7 deletions
diff --git a/‎optd-sqlplannertest/benches/planner_bench.rs
Lines changed: 150 additions & 0 deletions b/‎optd-sqlplannertest/benches/planner_bench.rs
Lines changed: 150 additions & 0 deletions
diff --git a/‎optd-sqlplannertest/src/bench_helper.rs
Lines changed: 64 additions & 0 deletions b/‎optd-sqlplannertest/src/bench_helper.rs
Lines changed: 64 additions & 0 deletions
@@ -13,7 +13,7 @@ repository = { workspace = true }
 [dependencies]
 clap = { version = "4.5.4", features = ["derive"] }
 anyhow = { version = "1", features = ["backtrace"] }
-sqlplannertest = "0.3"
+sqlplannertest = "0.4"
 async-trait = "0.1"
 datafusion-optd-cli = { path = "../datafusion-optd-cli", version = "43.0.0" }
 optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" }
@@ -40,6 +40,14 @@ optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
 itertools = "0.13"
 lazy_static = "1.4.0"
 
+[dev-dependencies]
+criterion = { version = "0.5.1", features = ["async_tokio"] }
+serde_yaml = "0.9"
+
 [[test]]
 name = "planner_test"
 harness = false
+
+[[bench]]
+name = "planner_bench"
+harness = false
@@ -3,14 +3,55 @@
 These test cases use the [sqlplannertest](https://crates.io/crates/sqlplannertest) crate to execute SQL queries and inspect their output.
 They do not check whether a plan is correct, and instead rely on a text-based diff of the query's output to determine whether something is different than the expected output.
 
+We are also using this crate to generate benchmarks for evaluating optd's performance. With the help with the [criterion](https://crates.io/crates/criterion) crate, we can benchmark planning time and the execution time of physical plan produced by the optimizer.
 
 ## Execute Test Cases
 
+**Running all test cases**
+
 ```shell
 cargo test -p optd-sqlplannertest
 # or use nextest
 cargo nextest run -p optd-sqlplannertest
 ```
+
+**Running tests in specfic modules or files**
+
+```shell
+# Running all test cases in the tpch module
+cargo nextest run -p optd-sqlplannertest tpch
+# Running all test cases in the tests/subqueries/subquery_unnesting.yml
+cargo nextest run -p optd-sqlplannertest subquery::subquery_unnesting
+```
+
+## Executing Benchmarks
+
+There are two metrics we care about when evaluating 
+
+### Usage
+
+```shell
+# Benchmark all TPC-H queries with "bench" task enabled
+cargo bench --bench planner_bench tpch/
+
+# Benchmark TPC-H Q1
+cargo bench --bench planner_bench tpch/q1/
+
+# Benchmark TPC-H Q1 planning
+cargo bench --bench planner_bench tpch/q1/planning
+
+# Benchmark TPC-H Q1 execution
+cargo bench --bench planner_bench tpch/q1/execution
+
+# View the HTML report
+python3 -m http.server -d ./target/criterion/
+```
+
+### Limitations
+
+`planner_bench` can only handle `sqlplannertest` yaml-based test file with single test case.
+
+
 ## Add New Test Case
 
 To add a SQL query tests, create a YAML file in a subdir in "tests".
@@ -30,11 +71,11 @@ Each file can contain multiple tests that are executed in sequential order from
     - explain:logical_optd,physical_optd
   desc: Equality predicate
 ```
-| Name       | Description                                                        |
-| ---------- | ------------------------------------------------------------------ |
-| `sql`      | List of SQL statements to execute separate by newlines             |
-| `tasks`    | How to execute the SQL statements. See [Tasks](#tasks) below       |
-| `desc`     | (Optional) Text description of what the test cases represents      |
+| Name    | Description                                                   |
+| ------- | ------------------------------------------------------------- |
+| `sql`   | List of SQL statements to execute separate by newlines        |
+| `tasks` | How to execute the SQL statements. See [Tasks](#tasks) below  |
+| `desc`  | (Optional) Text description of what the test cases represents |
 
 After adding the YAML file, you then need to use the update command to automatically create the matching SQL file that contains the expected output of the test cases.
 
@@ -46,14 +87,16 @@ The following commands will automatically update all of them for you. You should
 ```shell
 # Update all test cases
 cargo run -p optd-sqlplannertest --bin planner_test_apply
-# or, supply a list of directories to update
-cargo run -p optd-sqlplannertest --bin planner_test_apply -- subqueries
+# or, supply a list of modules or files to update
+cargo run -p optd-sqlplannertest --bin planner_test_apply -- subqueries tpch::q1
 ```
 
 ## Tasks
 
 The `explain` and `execute` task will be run with datafusion's logical optimizer disabled. Each task has some toggleable flags to control its behavior.
 
+The `bench` task is only used in benchmarks. A test case can only be executed as a benchmark if a bench task exists.
+
 ### `execute` Task
 
 #### Flags
 
@@ -0,0 +1,150 @@
+use std::{
+    future::Future,
+    path::{Path, PathBuf},
+};
+
+use anyhow::{bail, Context, Result};
+use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
+use optd_sqlplannertest::bench_helper::{
+    bench_run, bench_setup, ExecutionBenchRunner, PlannerBenchRunner, PlanningBenchRunner,
+};
+use sqlplannertest::{discover_tests_with_selections, parse_test_cases, TestCase};
+use tokio::runtime::Runtime;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let selection = "tpch";
+    let selections = vec![selection.to_string()];
+
+    let tests_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests");
+    planner_bench_runner(
+        &tests_dir,
+        || async { PlanningBenchRunner::new().await },
+        &selections,
+        c,
+    )
+    .unwrap();
+
+    let path = tests_dir.join(format!("{selection}/bench_populate.sql"));
+    let populate_sql = std::fs::read_to_string(&path)
+        .with_context(|| format!("failed to read {}", path.display()))
+        .unwrap();
+
+    planner_bench_runner(
+        &tests_dir,
+        move || {
+            let populate_sql = populate_sql.clone();
+            async { ExecutionBenchRunner::new(populate_sql).await }
+        },
+        &selections,
+        c,
+    )
+    .unwrap();
+}
+
+/// Discovers and bench each test case.
+///
+/// The user needs to provide a runner function that creates a runner that
+/// implements the [`PlannerBenchRunner`] trait.
+///
+/// A test case will be selected if:
+///
+/// 1. it's included in the `tests_dir` as part of `selections`.
+/// 2. has `bench` listed in the task list.
+///
+/// ## Limitation
+///
+/// Currently only accept sqlplannertest files with single test case.
+fn planner_bench_runner<F, Ft, R>(
+    tests_dir: impl AsRef<Path>,
+    runner_fn: F,
+    selections: &[String],
+    c: &mut Criterion,
+) -> Result<()>
+where
+    F: Fn() -> Ft + Send + Sync + 'static + Clone,
+    Ft: Future<Output = Result<R>> + Send,
+    R: PlannerBenchRunner + 'static,
+{
+    let tests = discover_tests_with_selections(&tests_dir, selections)?
+        .map(|path| {
+            let path = path?;
+            let relative_path = path
+                .strip_prefix(&tests_dir)
+                .context("unable to relative path")?
+                .as_os_str();
+            let testname = relative_path
+                .to_str()
+                .context("unable to convert to string")?
+                .to_string();
+            Ok::<_, anyhow::Error>((path, testname))
+        })
+        .collect::<Result<Vec<_>, _>>()?;
+
+    for (path, testname) in tests {
+        bench_runner(path, testname, runner_fn.clone(), c)?;
+    }
+
+    Ok(())
+}
+
+/// Bench runner for a test case.
+fn bench_runner<F, Ft, R>(
+    path: PathBuf,
+    testname: String,
+    runner_fn: F,
+    c: &mut Criterion,
+) -> Result<()>
+where
+    F: Fn() -> Ft + Send + Sync + 'static + Clone,
+    Ft: Future<Output = Result<R>> + Send,
+    R: PlannerBenchRunner,
+{
+    fn build_runtime() -> Runtime {
+        tokio::runtime::Builder::new_current_thread()
+            .enable_all()
+            .build()
+            .unwrap()
+    }
+
+    let testcases = std::fs::read(&path)?;
+    let testcases: Vec<TestCase> = serde_yaml::from_slice(&testcases)?;
+
+    let testcases = parse_test_cases(
+        {
+            let mut path = path.clone();
+            path.pop();
+            path
+        },
+        testcases,
+    )?;
+
+    if testcases.len() != 1 {
+        bail!(
+            "planner_bench can only handle sqlplannertest yml file with one test cases, {} has {}",
+            path.display(),
+            testcases.len()
+        );
+    }
+
+    let testcase = &testcases[0];
+
+    let should_bench = testcase.tasks.iter().any(|x| x.starts_with("bench"));
+    if should_bench {
+        let mut group = c.benchmark_group(testname.strip_suffix(".yml").unwrap());
+        let runtime = build_runtime();
+        group.bench_function(R::BENCH_NAME, |b| {
+            b.iter_batched(
+                || bench_setup(&runtime, runner_fn.clone(), testcase),
+                |(runner, input, flags)| {
+                    bench_run(&runtime, runner, black_box(input), testcase, &flags)
+                },
+                BatchSize::PerIteration,
+            );
+        });
+        group.finish();
+    }
+    Ok(())
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
@@ -0,0 +1,64 @@
+pub mod execution;
+pub mod planning;
+
+use std::future::Future;
+
+use crate::TestFlags;
+use anyhow::Result;
+use tokio::runtime::Runtime;
+
+pub use execution::ExecutionBenchRunner;
+pub use planning::PlanningBenchRunner;
+
+pub trait PlannerBenchRunner {
+    /// Describes what the benchmark is evaluating.
+    const BENCH_NAME: &str;
+    /// Benchmark's input.
+    type BenchInput;
+
+    /// Setups the necessary environment for the benchmark based on the test case.
+    /// Returns the input needed for the benchmark.
+    fn setup(
+        &mut self,
+        test_case: &sqlplannertest::ParsedTestCase,
+    ) -> impl std::future::Future<Output = Result<(Self::BenchInput, TestFlags)>> + Send;
+
+    /// Runs the actual benchmark based on the test case and input.
+    fn bench(
+        self,
+        input: Self::BenchInput,
+        test_case: &sqlplannertest::ParsedTestCase,
+        flags: &TestFlags,
+    ) -> impl std::future::Future<Output = Result<()>> + Send;
+}
+
+/// Sync wrapper for [`PlannerBenchRunner::setup`]
+pub fn bench_setup<F, Ft, R>(
+    runtime: &Runtime,
+    runner_fn: F,
+    testcase: &sqlplannertest::ParsedTestCase,
+) -> (R, R::BenchInput, TestFlags)
+where
+    F: Fn() -> Ft + Send + Sync + 'static + Clone,
+    Ft: Future<Output = Result<R>> + Send,
+    R: PlannerBenchRunner,
+{
+    runtime.block_on(async {
+        let mut runner = runner_fn().await.unwrap();
+        let (input, flags) = runner.setup(testcase).await.unwrap();
+        (runner, input, flags)
+    })
+}
+
+/// Sync wrapper for [`PlannerBenchRunner::bench`]
+pub fn bench_run<R>(
+    runtime: &Runtime,
+    runner: R,
+    input: R::BenchInput,
+    testcase: &sqlplannertest::ParsedTestCase,
+    flags: &TestFlags,
+) where
+    R: PlannerBenchRunner,
+{
+    runtime.block_on(async { runner.bench(input, testcase, flags).await.unwrap() });
+}