Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit b52f5ed

Browse files
feat: CLI for optd-perftest (#106)
**Summary**: An extensible CLI built with `clap` for the `optd-perftest` package **Demo**: https://github.com/cmu-db/optd/assets/20631215/05315dcc-6a91-4c4d-a9ea-fdb8d15ba675 **Details**: * The CLI currently only exposes the `cardtest` option (cardinality testing) but can be extended to support more types of performance tests * The `cardtest` option takes in scale factor and seed as options, but both have defaults as well * Fixed a bug where we forgot to reconnect after reinitializing pgdata
1 parent 30c76e8 commit b52f5ed

File tree

8 files changed

+154
-27
lines changed

8 files changed

+154
-27
lines changed

Cargo.lock

+52-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

mac_kill_postgres.sh

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
# a sketchy way to kill "orphaned" postgres processes (i.e. processes whose pgdata dir has already been deleted)
3+
# I named it mac cuz I have no idea if it'll work on linux
4+
# use this script to avoid having to manually do "ps aux" and "kill [pid]" every time cardtest_integration (or something similar) fails
5+
6+
# the "main" process has bin/postgres in it
7+
# we kill it in a loop because sometimes it's required for some reason I don't understand
8+
while pid=$(ps aux | grep bin/postgres | grep -v grep | head -n1 | awk '{print $2}'); do
9+
if [ -z "$pid" ]; then
10+
break
11+
else
12+
# we'll sometimes kill pids that don't exist. don't show the error in these cases
13+
kill $pid &>/dev/null
14+
sleep 1 # sleep so we don't loop too fast
15+
fi
16+
done
17+
18+
# even after it's gone, wait for it to completely shut down
19+
sleep 1

optd-perftest/Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,8 @@ tokio = { version = "1.24", features = [
2020
shlex = "1.3"
2121
tokio-postgres = "0.7"
2222
regex = "1.10"
23+
clap = { version = "4.5", features = [
24+
"derive",
25+
] }
2326
log = "0.4"
2427
env_logger = "0.11"

optd-perftest/src/cardtest.rs

+14-5
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ impl CardtestRunner {
2525
/// of the entire query, not of a subtree of the query. This detail is specified in Section 7.1 of
2626
/// [Yang 2020](https://arxiv.org/pdf/2006.08109.pdf)
2727
pub async fn eval_benchmark_qerrors_alldbs(
28-
&self,
28+
&mut self,
2929
benchmark: &Benchmark,
3030
) -> anyhow::Result<HashMap<String, Vec<f64>>> {
3131
let mut qerrors_alldbs = HashMap::new();
3232

33-
for database in &self.databases {
33+
for database in &mut self.databases {
3434
let estcards = database.eval_benchmark_estcards(benchmark).await?;
3535
let truecards = database.eval_benchmark_truecards(benchmark).await?;
3636
assert!(truecards.len() == estcards.len());
@@ -76,7 +76,16 @@ pub trait CardtestRunnerDBHelper {
7676
// get_name() has &self so that we're able to do Box<dyn CardtestRunnerDBHelper>
7777
fn get_name(&self) -> &str;
7878

79-
// the order of queries has to be the same between these two functions
80-
async fn eval_benchmark_estcards(&self, benchmark: &Benchmark) -> anyhow::Result<Vec<usize>>;
81-
async fn eval_benchmark_truecards(&self, benchmark: &Benchmark) -> anyhow::Result<Vec<usize>>;
79+
// The order of queries has to be the same between these two functions.
80+
// They take mutable references because evaluation sometimes involves mutating self.
81+
// One example of this is in PostgresDb where we may need to reconnect to the database,
82+
// which requires modifying the PostgresDb object.
83+
async fn eval_benchmark_estcards(
84+
&mut self,
85+
benchmark: &Benchmark,
86+
) -> anyhow::Result<Vec<usize>>;
87+
async fn eval_benchmark_truecards(
88+
&mut self,
89+
benchmark: &Benchmark,
90+
) -> anyhow::Result<Vec<usize>>;
8291
}

optd-perftest/src/datafusion_db_cardtest.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,17 @@ impl CardtestRunnerDBHelper for DatafusionDb {
88
"DataFusion"
99
}
1010

11-
async fn eval_benchmark_truecards(&self, _benchmark: &Benchmark) -> anyhow::Result<Vec<usize>> {
11+
async fn eval_benchmark_truecards(
12+
&mut self,
13+
_benchmark: &Benchmark,
14+
) -> anyhow::Result<Vec<usize>> {
1215
Ok(vec![])
1316
}
1417

15-
async fn eval_benchmark_estcards(&self, _benchmark: &Benchmark) -> anyhow::Result<Vec<usize>> {
18+
async fn eval_benchmark_estcards(
19+
&mut self,
20+
_benchmark: &Benchmark,
21+
) -> anyhow::Result<Vec<usize>> {
1622
Ok(vec![])
1723
}
1824
}

optd-perftest/src/main.rs

+37-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
use anyhow::Result;
21
use cardtest::{CardtestRunner, CardtestRunnerDBHelper};
2+
use clap::{Parser, Subcommand};
33
use postgres_db::PostgresDb;
44

55
use crate::{
@@ -14,18 +14,47 @@ mod postgres_db;
1414
mod shell;
1515
mod tpch;
1616

17+
#[derive(Parser)]
18+
struct Cli {
19+
#[command(subcommand)]
20+
command: Commands,
21+
}
22+
23+
#[derive(Subcommand)]
24+
enum Commands {
25+
Cardtest {
26+
#[arg(long)]
27+
#[clap(default_value = "0.01")]
28+
scale_factor: f64,
29+
#[arg(long)]
30+
#[clap(default_value = "15721")]
31+
seed: i32,
32+
},
33+
}
34+
1735
#[tokio::main]
18-
async fn main() -> Result<()> {
36+
async fn main() -> anyhow::Result<()> {
1937
env_logger::init();
38+
let cli = Cli::parse();
39+
40+
match &cli.command {
41+
Commands::Cardtest { scale_factor, seed } => {
42+
let tpch_config = TpchConfig {
43+
database: String::from(TPCH_KIT_POSTGRES),
44+
scale_factor: *scale_factor,
45+
seed: *seed,
46+
};
47+
cardtest(tpch_config).await
48+
}
49+
}
50+
}
51+
52+
async fn cardtest(tpch_config: TpchConfig) -> anyhow::Result<()> {
2053
let pg_db = PostgresDb::build().await?;
2154
let databases: Vec<Box<dyn CardtestRunnerDBHelper>> = vec![Box::new(pg_db)];
22-
let tpch_config = TpchConfig {
23-
database: String::from(TPCH_KIT_POSTGRES),
24-
scale_factor: 1,
25-
seed: 15721,
26-
};
55+
2756
let tpch_benchmark = Benchmark::Tpch(tpch_config.clone());
28-
let cardtest_runner = CardtestRunner::new(databases).await?;
57+
let mut cardtest_runner = CardtestRunner::new(databases).await?;
2958
let qerrors = cardtest_runner
3059
.eval_benchmark_qerrors_alldbs(&tpch_benchmark)
3160
.await?;

optd-perftest/src/postgres_db.rs

+16-5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ pub struct PostgresDb {
3333
/// - Setup should be done in build() unless it requires more information (like benchmark)
3434
impl PostgresDb {
3535
pub async fn build() -> anyhow::Result<Self> {
36+
log::debug!("[start] building PostgresDb");
37+
3638
// build paths, sometimes creating them if they don't exist
3739
let curr_dpath = env::current_dir()?;
3840
let postgres_db_dpath = Path::new(file!())
@@ -61,6 +63,7 @@ impl PostgresDb {
6163
db.start_postgres().await?;
6264
db.connect_to_postgres().await?;
6365

66+
log::debug!("[end] building PostgresDb");
6467
Ok(db)
6568
}
6669

@@ -178,7 +181,7 @@ impl PostgresDb {
178181
Ok(())
179182
}
180183

181-
async fn load_benchmark_data(&self, benchmark: &Benchmark) -> anyhow::Result<()> {
184+
async fn load_benchmark_data(&mut self, benchmark: &Benchmark) -> anyhow::Result<()> {
182185
let benchmark_stringid = benchmark.get_stringid();
183186
if benchmark.is_readonly() {
184187
let done_fname = format!("{}_done", benchmark_stringid);
@@ -200,7 +203,7 @@ impl PostgresDb {
200203
}
201204

202205
/// Load the benchmark data without worrying about caching
203-
async fn load_benchmark_data_raw(&self, benchmark: &Benchmark) -> anyhow::Result<()> {
206+
async fn load_benchmark_data_raw(&mut self, benchmark: &Benchmark) -> anyhow::Result<()> {
204207
match benchmark {
205208
Benchmark::Tpch(tpch_config) => self.load_tpch_data_raw(tpch_config).await?,
206209
_ => unimplemented!(),
@@ -209,13 +212,15 @@ impl PostgresDb {
209212
}
210213

211214
/// Load the TPC-H data without worrying about caching
212-
async fn load_tpch_data_raw(&self, tpch_config: &TpchConfig) -> anyhow::Result<()> {
215+
async fn load_tpch_data_raw(&mut self, tpch_config: &TpchConfig) -> anyhow::Result<()> {
213216
// start from a clean slate
214217
self.remove_pgdata().await?;
215218
// since we deleted pgdata we'll need to re-init it
216219
self.init_pgdata().await?;
217220
// postgres must be started again since remove_pgdata() stops it
218221
self.start_postgres().await?;
222+
// deleting pgdata would also delete the old connection so we have to reconnect
223+
self.connect_to_postgres().await?;
219224
// load the schema
220225
let tpch_kit = TpchKit::build()?;
221226
shell::run_command_with_status_check(&format!(
@@ -248,15 +253,21 @@ impl CardtestRunnerDBHelper for PostgresDb {
248253
"Postgres"
249254
}
250255

251-
async fn eval_benchmark_estcards(&self, benchmark: &Benchmark) -> anyhow::Result<Vec<usize>> {
256+
async fn eval_benchmark_estcards(
257+
&mut self,
258+
benchmark: &Benchmark,
259+
) -> anyhow::Result<Vec<usize>> {
252260
self.load_benchmark_data(benchmark).await?;
253261
match benchmark {
254262
Benchmark::Test => unimplemented!(),
255263
Benchmark::Tpch(tpch_config) => self.eval_tpch_estcards(tpch_config).await,
256264
}
257265
}
258266

259-
async fn eval_benchmark_truecards(&self, benchmark: &Benchmark) -> anyhow::Result<Vec<usize>> {
267+
async fn eval_benchmark_truecards(
268+
&mut self,
269+
benchmark: &Benchmark,
270+
) -> anyhow::Result<Vec<usize>> {
260271
self.load_benchmark_data(benchmark).await?;
261272
match benchmark {
262273
Benchmark::Test => unimplemented!(),

optd-perftest/src/tpch.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ const TPCH_KIT_REPO_URL: &str = "[email protected]:lmwnshn/tpch-kit.git";
1111
pub const TPCH_KIT_POSTGRES: &str = "POSTGRESQL";
1212
const NUM_TPCH_QUERIES: usize = 22;
1313

14-
#[derive(Clone)]
14+
#[derive(Clone, Debug)]
1515
pub struct TpchConfig {
1616
pub database: String,
17-
pub scale_factor: i32,
17+
pub scale_factor: f64,
1818
pub seed: i32,
1919
}
2020

@@ -44,6 +44,8 @@ pub struct TpchKit {
4444
/// I keep the same conventions for these methods as I do for PostgresDb
4545
impl TpchKit {
4646
pub fn build() -> io::Result<Self> {
47+
log::debug!("[start] building TpchKit");
48+
4749
// build paths, sometimes creating them if they don't exist
4850
// we assume that this is being run in the base optd repo dir
4951
let optd_repo_dpath = env::current_dir()?;
@@ -88,6 +90,7 @@ impl TpchKit {
8890
// do setup after creating kit
8991
kit.clonepull_tpch_kit_repo()?;
9092

93+
log::debug!("[end] building TpchKit");
9194
Ok(kit)
9295
}
9396

0 commit comments

Comments
 (0)