Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 368bcd6

Browse files
feat: printing q-errors nicely + explains (#122)
**Summary**: Now printing aggregate q-errors, per-query q-errors, and EXPLAIN results to facilitate experimentation. **Demo**: ![Screenshot 2024-03-21 at 13 34 13](https://github.com/cmu-db/optd/assets/20631215/795f42f8-42a6-45f2-bd5e-c44fd6b8cff3) ![Screenshot 2024-03-21 at 13 28 24](https://github.com/cmu-db/optd/assets/20631215/f9b1be74-1e48-4878-a4f9-26c7f68dd49c) **Details**: * Automatically prints the `EXPLAIN` results of all queries from all DBs to info. Use `RUST_LOG=info` to see them. Use `--query-ids X` to just investigate one specific query. * Now handles edge case where file indicating that a db was created exists but the db doesn't actually exist. * Gracefully handles the case of infinite q-errors. * Deleted dev_scripts. Moved to [gungnir-experiments](https://github.com/wangpatrick57/gungnir-experiments). * Uses `prettytable-rs` for printing.
1 parent cdef513 commit 368bcd6

11 files changed

+448
-141
lines changed

Cargo.lock

+280-40
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dev_scripts/kill_postgres.sh

-22
This file was deleted.

dev_scripts/start_postgres.sh

-15
This file was deleted.

optd-perftest/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ env_logger = "0.11"
3939
lazy_static = "1.4.0"
4040
tokio-util = "0.7"
4141
futures-util = "0.3"
42+
statistical = "1.0"
43+
prettytable-rs = "0.10"
4244

4345
[dev_dependencies]
4446
assert_cmd = "2.0"

optd-perftest/src/cardtest.rs

+27-27
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,21 @@ use crate::{benchmark::Benchmark, datafusion_db::DatafusionDb, tpch::TpchConfig}
77
use anyhow::{self};
88
use async_trait::async_trait;
99

10-
/// This struct performs cardinality testing across one or more databases.
11-
/// Another design would be for the CardtestRunnerDBHelper trait to expose a function
10+
/// This struct performs cardinality testing across one or more dbmss.
11+
/// Another design would be for the CardtestRunnerDBMSHelper trait to expose a function
1212
/// to evaluate the Q-error. However, I chose not to do this design for reasons
13-
/// described in the comments of the CardtestRunnerDBHelper trait. This is why
14-
/// you would use CardtestRunner even for computing the Q-error of a single database.
13+
/// described in the comments of the CardtestRunnerDBMSHelper trait. This is why
14+
/// you would use CardtestRunner even for computing the Q-error of a single dbms.
1515
pub struct CardtestRunner {
16-
pub databases: Vec<Box<dyn CardtestRunnerDBHelper>>,
16+
pub dbmss: Vec<Box<dyn CardtestRunnerDBMSHelper>>,
1717
}
1818

1919
impl CardtestRunner {
20-
pub async fn new(databases: Vec<Box<dyn CardtestRunnerDBHelper>>) -> anyhow::Result<Self> {
21-
Ok(CardtestRunner { databases })
20+
pub async fn new(dbmss: Vec<Box<dyn CardtestRunnerDBMSHelper>>) -> anyhow::Result<Self> {
21+
Ok(CardtestRunner { dbmss })
2222
}
2323

24-
/// Get the Q-error of a query using the cost models of all databases being tested
24+
/// Get the Q-error of a query using the cost models of all dbmss being tested
2525
/// Q-error is defined in [Leis 2015](https://15721.courses.cs.cmu.edu/spring2024/papers/16-costmodels/p204-leis.pdf)
2626
/// One detail not specified in the paper is that Q-error is based on the ratio of true and estimated cardinality
2727
/// of the entire query, not of a subtree of the query. This detail is specified in Section 7.1 of
@@ -32,16 +32,16 @@ impl CardtestRunner {
3232
) -> anyhow::Result<HashMap<String, Vec<f64>>> {
3333
let mut qerrors_alldbs = HashMap::new();
3434

35-
for database in &mut self.databases {
36-
let estcards = database.eval_benchmark_estcards(benchmark).await?;
37-
let truecards = database.eval_benchmark_truecards(benchmark).await?;
35+
for dbms in &mut self.dbmss {
36+
let estcards = dbms.eval_benchmark_estcards(benchmark).await?;
37+
let truecards = dbms.eval_benchmark_truecards(benchmark).await?;
3838
assert!(truecards.len() == estcards.len());
3939
let qerrors = estcards
4040
.into_iter()
4141
.zip(truecards.into_iter())
4242
.map(|(estcard, truecard)| CardtestRunner::calc_qerror(estcard, truecard))
4343
.collect();
44-
qerrors_alldbs.insert(String::from(database.get_name()), qerrors);
44+
qerrors_alldbs.insert(String::from(dbms.get_name()), qerrors);
4545
}
4646

4747
Ok(qerrors_alldbs)
@@ -55,27 +55,27 @@ impl CardtestRunner {
5555
}
5656
}
5757

58-
/// This trait defines helper functions to enable cardinality testing on a database
58+
/// This trait defines helper functions to enable cardinality testing on a dbms
5959
/// The reason a "get qerror" function is not exposed is to allow for greater
60-
/// flexibility. If we exposed "get qerror" for each database, we would need to
61-
/// get the true and estimated cardinalities for _each_ database. However, we
62-
/// can now choose to only get the true cardinalities of _one_ database to
60+
/// flexibility. If we exposed "get qerror" for each dbms, we would need to
61+
/// get the true and estimated cardinalities for _each_ dbms. However, we
62+
/// can now choose to only get the true cardinalities of _one_ dbms to
6363
/// improve performance or even cache the true cardinalities. Additionally, if
64-
/// we do want to get the true cardinalities of all databases, we can compare
64+
/// we do want to get the true cardinalities of all dbmss, we can compare
6565
/// them against each other to ensure they're all equal. All these options are
6666
/// possible when exposing "get true card" and "get est card" instead of a
6767
/// single "get qerror". If you want to compute the Q-error of a single
68-
/// database, just create a CardtestRunner with a single database as input.
68+
/// dbms, just create a CardtestRunner with a single dbms as input.
6969
/// When exposing a "get true card" and "get est card" interface, you could
7070
/// ostensibly do it on the granularity of a single SQL string or on the
7171
/// granularity of an entire benchmark. I chose the latter for a simple reason:
72-
/// different databases might have different SQL strings for the same conceptual
73-
/// query (see how qgen in tpch-kit takes in database as an input).
72+
/// different dbmss might have different SQL strings for the same conceptual
73+
/// query (see how qgen in tpch-kit takes in dbms as an input).
7474
/// When more performance tests are implemented, you would probably want to extract
75-
/// get_name() into a generic "Database" trait.
75+
/// get_name() into a generic "DBMS" trait.
7676
#[async_trait]
77-
pub trait CardtestRunnerDBHelper {
78-
// get_name() has &self so that we're able to do Box<dyn CardtestRunnerDBHelper>
77+
pub trait CardtestRunnerDBMSHelper {
78+
// get_name() has &self so that we're able to do Box<dyn CardtestRunnerDBMSHelper>
7979
fn get_name(&self) -> &str;
8080

8181
// The order of queries has to be the same between these two functions.
@@ -97,12 +97,12 @@ pub async fn cardtest<P: AsRef<Path> + Clone>(
9797
) -> anyhow::Result<HashMap<String, Vec<f64>>> {
9898
let pg_db = PostgresDb::new(workspace_dpath.clone(), pguser, pgpassword);
9999
let df_db = DatafusionDb::new(workspace_dpath).await?;
100-
let databases: Vec<Box<dyn CardtestRunnerDBHelper>> = vec![Box::new(pg_db), Box::new(df_db)];
100+
let dbmss: Vec<Box<dyn CardtestRunnerDBMSHelper>> = vec![Box::new(pg_db), Box::new(df_db)];
101101

102102
let tpch_benchmark = Benchmark::Tpch(tpch_config.clone());
103-
let mut cardtest_runner = CardtestRunner::new(databases).await?;
104-
let qerrors = cardtest_runner
103+
let mut cardtest_runner = CardtestRunner::new(dbmss).await?;
104+
let qerrors_alldbs = cardtest_runner
105105
.eval_benchmark_qerrors_alldbs(&tpch_benchmark)
106106
.await?;
107-
Ok(qerrors)
107+
Ok(qerrors_alldbs)
108108
}

optd-perftest/src/datafusion_db.rs

+13-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::{
66

77
use crate::{
88
benchmark::Benchmark,
9-
cardtest::CardtestRunnerDBHelper,
9+
cardtest::CardtestRunnerDBMSHelper,
1010
tpch::{TpchConfig, TpchKit},
1111
};
1212
use async_trait::async_trait;
@@ -35,7 +35,7 @@ pub struct DatafusionDb {
3535
}
3636

3737
#[async_trait]
38-
impl CardtestRunnerDBHelper for DatafusionDb {
38+
impl CardtestRunnerDBMSHelper for DatafusionDb {
3939
fn get_name(&self) -> &str {
4040
"DataFusion"
4141
}
@@ -170,11 +170,22 @@ impl DatafusionDb {
170170
Ok(num_rows)
171171
}
172172

173+
fn log_explain(&self, explains: &[Vec<String>]) {
174+
// row_cnt is exclusively in physical_plan after optd
175+
let physical_plan_after_optd_lines = explains
176+
.iter()
177+
.find(|explain| explain.first().unwrap() == "physical_plan after optd")
178+
.unwrap();
179+
let explain_str = physical_plan_after_optd_lines.join("\n");
180+
log::info!("{} {}", self.get_name(), explain_str);
181+
}
182+
173183
async fn eval_query_estcard(&self, sql: &str) -> anyhow::Result<usize> {
174184
lazy_static! {
175185
static ref ROW_CNT_RE: Regex = Regex::new(r"row_cnt=(\d+\.\d+)").unwrap();
176186
}
177187
let explains = Self::execute(&self.ctx, &format!("explain verbose {}", sql)).await?;
188+
self.log_explain(&explains);
178189
// Find first occurrence of row_cnt=... in the output.
179190
let row_cnt = explains
180191
.iter()

optd-perftest/src/main.rs

+68-7
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1+
use clap::{Parser, Subcommand};
12
use optd_perftest::cardtest;
23
use optd_perftest::shell;
34
use optd_perftest::tpch::{TpchConfig, TPCH_KIT_POSTGRES};
4-
use std::fs;
5-
6-
use clap::{Parser, Subcommand};
5+
use prettytable::{format, Cell, Row, Table};
6+
use std::{fs, iter};
77

88
#[derive(Parser)]
99
struct Cli {
@@ -66,14 +66,75 @@ async fn main() -> anyhow::Result<()> {
6666
pgpassword,
6767
} => {
6868
let tpch_config = TpchConfig {
69-
database: String::from(TPCH_KIT_POSTGRES),
69+
dbms: String::from(TPCH_KIT_POSTGRES),
7070
scale_factor,
7171
seed,
72-
query_ids,
72+
query_ids: query_ids.clone(),
7373
};
74-
let qerrors =
74+
let qerrors_alldbs =
7575
cardtest::cardtest(&workspace_dpath, &pguser, &pgpassword, tpch_config).await?;
76-
println!("qerrors={:?}", qerrors);
76+
println!(" Aggregate Q-Error Comparison");
77+
let mut agg_qerror_table = Table::new();
78+
agg_qerror_table.set_titles(prettytable::row![
79+
"DBMS",
80+
"Median",
81+
"# Infinite",
82+
"Mean",
83+
"Min",
84+
"Max"
85+
]);
86+
for (dbms, qerrors) in &qerrors_alldbs {
87+
if !qerrors.is_empty() {
88+
let finite_qerrors: Vec<f64> = qerrors
89+
.clone()
90+
.into_iter()
91+
.filter(|&qerror| qerror.is_finite())
92+
.collect();
93+
let ninf_qerrors = qerrors.len() - finite_qerrors.len();
94+
let mean_qerror =
95+
finite_qerrors.iter().sum::<f64>() / finite_qerrors.len() as f64;
96+
let min_qerror = finite_qerrors
97+
.iter()
98+
.min_by(|a, b| a.partial_cmp(b).unwrap())
99+
.unwrap();
100+
let median_qerror = statistical::median(qerrors);
101+
let max_qerror = finite_qerrors
102+
.iter()
103+
.max_by(|a, b| a.partial_cmp(b).unwrap())
104+
.unwrap();
105+
agg_qerror_table.add_row(prettytable::row![
106+
dbms,
107+
median_qerror,
108+
ninf_qerrors,
109+
mean_qerror,
110+
min_qerror,
111+
max_qerror
112+
]);
113+
} else {
114+
agg_qerror_table
115+
.add_row(prettytable::row![dbms, "N/A", "N/A", "N/A", "N/A", "N/A"]);
116+
}
117+
}
118+
agg_qerror_table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
119+
agg_qerror_table.printstd();
120+
121+
let mut per_query_qerror_table = Table::new();
122+
println!(" Per-Query Q-Error Comparison");
123+
let title_cells = iter::once(Cell::new("Query #"))
124+
.chain(qerrors_alldbs.keys().map(|dbms| Cell::new(dbms)))
125+
.collect();
126+
per_query_qerror_table.set_titles(Row::new(title_cells));
127+
for (i, query_id) in query_ids.iter().enumerate() {
128+
let mut row_cells = vec![];
129+
row_cells.push(prettytable::cell!(query_id));
130+
for qerrors in qerrors_alldbs.values() {
131+
let qerror = qerrors.get(i).unwrap();
132+
row_cells.push(prettytable::cell!(qerror));
133+
}
134+
per_query_qerror_table.add_row(Row::new(row_cells));
135+
}
136+
per_query_qerror_table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
137+
per_query_qerror_table.printstd();
77138
}
78139
}
79140

0 commit comments

Comments
 (0)