Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit af92dca

Browse files
feat: integration test for cardinality performance testing (#113)
**Summary**: An integration test for cardinality performance testing. **Demo**: https://github.com/cmu-db/optd/assets/20631215/8eda2a04-0c23-4646-932d-8c0e22b249e4 **Details**: * The integration test requires Postgres to be running. * The integration test runs in its own workspace that's fully deleted at the start of each test. * The test runs the cardtest twice to test that it works both with and without cached files in the workspace. * Changed parts of CLI ("," delimiter for `--query-ids` instead of " ", added defaults for `--query-ids`, etc.). * Refactored modules into `lib.rs` so that `tests/` can access all modules. * Added `pguser` and `pgpassword` args to CLI. * Changed COPY IN to be from STDIN instead of from a file so that we can load even to Postgres processes in a container or on another machine.
1 parent 7bb9bee commit af92dca

File tree

13 files changed

+236
-93
lines changed

13 files changed

+236
-93
lines changed

.github/workflows/CI.yaml

+16
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,22 @@ env:
1414
jobs:
1515
check:
1616
runs-on: ubuntu-latest
17+
18+
services:
19+
postgres:
20+
image: postgres:15
21+
env:
22+
POSTGRES_USER: test_user
23+
POSTGRES_PASSWORD: password
24+
POSTGRES_DB: postgres
25+
ports:
26+
- 5432:5432
27+
options: >-
28+
--health-cmd pg_isready
29+
--health-interval 10s
30+
--health-timeout 5s
31+
--health-retries 5
32+
1733
steps:
1834
- uses: actions/checkout@v2
1935
- uses: actions-rs/toolchain@v1

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
/.vscode
33
/.DS_Store
44
/.idea
5-
.history
5+
.history
6+
**/*_workspace/**

Cargo.lock

+3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dev_scripts/kill_postgres.sh

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
if [[ "$(whoami)" == "patrickwang" ]]; then
3+
# a sketchy way to kill "orphaned" postgres processes (i.e. processes whose pgdata dir has already been deleted)
4+
# use this script to avoid having to manually do "ps aux" and "kill [pid]" every time cardtest_integration (or something similar) fails
5+
# the "main" process has bin/postgres in it
6+
# we kill it in a loop because sometimes it's required for some reason I don't understand
7+
while pid=$(ps aux | grep bin/postgres | grep -v grep | head -n1 | awk '{print $2}'); do
8+
if [ -z "$pid" ]; then
9+
break
10+
else
11+
# we'll sometimes kill pids that don't exist. don't show the error in these cases
12+
kill $pid &>/dev/null
13+
sleep 1 # sleep so we don't loop too fast
14+
fi
15+
done
16+
17+
# even after it's gone, wait for it to completely shut down
18+
sleep 1
19+
else
20+
echo "unimplemented" >&2
21+
exit 1
22+
fi

dev_scripts/start_postgres.sh

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
if [[ "$(whoami)" == "patrickwang" ]]; then
3+
rm -rf ~/pgdata
4+
cd ~/pgdata
5+
initdb
6+
cd -
7+
pg_ctl start
8+
# default_user is used for cargo run --bin optd-perftest
9+
psql -d postgres -c "CREATE USER default_user WITH SUPERUSER PASSWORD 'password';"
10+
# test_user is used for cargo test --package optd-perftest
11+
psql -d postgres -c "CREATE USER test_user WITH SUPERUSER PASSWORD 'password';"
12+
else
13+
echo "unimplemented" >&2
14+
exit 1
15+
fi

mac_kill_postgres.sh

-19
This file was deleted.

optd-perftest/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,8 @@ clap = { version = "4.5", features = [
3636
log = "0.4"
3737
env_logger = "0.11"
3838
lazy_static = "1.4.0"
39+
tokio-util = "0.7"
40+
futures-util = "0.3"
41+
42+
[dev_dependencies]
43+
assert_cmd = "2.0"

optd-perftest/src/cardtest.rs

+22-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
use std::collections::HashMap;
2+
use std::path::Path;
3+
4+
use crate::postgres_db::PostgresDb;
5+
use crate::{benchmark::Benchmark, datafusion_db_cardtest::DatafusionDb, tpch::TpchConfig};
26

37
use anyhow::{self};
48
use async_trait::async_trait;
59

6-
use crate::benchmark::Benchmark;
7-
810
/// This struct performs cardinality testing across one or more databases.
911
/// Another design would be for the CardtestRunnerDBHelper trait to expose a function
1012
/// to evaluate the Q-error. However, I chose not to do this design for reasons
@@ -86,3 +88,21 @@ pub trait CardtestRunnerDBHelper {
8688
benchmark: &Benchmark,
8789
) -> anyhow::Result<Vec<usize>>;
8890
}
91+
92+
pub async fn cardtest<P: AsRef<Path> + Clone>(
93+
workspace_dpath: P,
94+
pguser: &str,
95+
pgpassword: &str,
96+
tpch_config: TpchConfig,
97+
) -> anyhow::Result<HashMap<String, Vec<f64>>> {
98+
let pg_db = PostgresDb::new(workspace_dpath.clone(), pguser, pgpassword);
99+
let df_db = DatafusionDb::new(workspace_dpath).await?;
100+
let databases: Vec<Box<dyn CardtestRunnerDBHelper>> = vec![Box::new(pg_db), Box::new(df_db)];
101+
102+
let tpch_benchmark = Benchmark::Tpch(tpch_config.clone());
103+
let mut cardtest_runner = CardtestRunner::new(databases).await?;
104+
let qerrors = cardtest_runner
105+
.eval_benchmark_qerrors_alldbs(&tpch_benchmark)
106+
.await?;
107+
Ok(qerrors)
108+
}

optd-perftest/src/lib.rs

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
mod benchmark;
2+
pub mod cardtest;
3+
mod datafusion_db_cardtest;
4+
mod postgres_db;
5+
pub mod shell;
6+
pub mod tpch;

optd-perftest/src/main.rs

+26-34
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,19 @@
1-
use std::{fs, path::Path};
1+
use optd_perftest::cardtest;
2+
use optd_perftest::shell;
3+
use optd_perftest::tpch::{TpchConfig, TPCH_KIT_POSTGRES};
4+
use std::fs;
25

3-
use cardtest::{CardtestRunner, CardtestRunnerDBHelper};
46
use clap::{Parser, Subcommand};
5-
use postgres_db::PostgresDb;
6-
7-
use crate::{
8-
benchmark::Benchmark,
9-
datafusion_db_cardtest::DatafusionDb,
10-
tpch::{TpchConfig, TPCH_KIT_POSTGRES},
11-
};
12-
13-
mod benchmark;
14-
mod cardtest;
15-
mod datafusion_db_cardtest;
16-
mod postgres_db;
17-
mod shell;
18-
mod tpch;
197

208
#[derive(Parser)]
219
struct Cli {
2210
#[arg(long)]
23-
#[clap(default_value = "../optd_perftest_workspace")]
11+
#[clap(default_value = "optd_perftest_workspace")]
2412
#[clap(
2513
help = "The directory where artifacts required for performance testing (such as pgdata or TPC-H queries) are generated. See comment of parse_pathstr() to see what paths are allowed (TLDR: absolute and relative both ok)."
2614
)]
2715
workspace: String,
16+
2817
#[command(subcommand)]
2918
command: Commands,
3019
}
@@ -35,12 +24,26 @@ enum Commands {
3524
#[arg(long)]
3625
#[clap(default_value = "0.01")]
3726
scale_factor: f64,
27+
3828
#[arg(long)]
3929
#[clap(default_value = "15721")]
4030
seed: i32,
31+
4132
#[arg(long)]
42-
#[clap(value_delimiter = ' ', num_args = 1..)]
33+
#[clap(value_delimiter = ',', num_args = 1..)]
34+
// this is the current list of all queries that work in perftest
35+
#[clap(default_value = "2,3,5,7,8,9,10,12,14,17")]
4336
query_ids: Vec<u32>,
37+
38+
#[arg(long)]
39+
#[clap(default_value = "default_user")]
40+
#[clap(help = "The name of a user with superuser privileges")]
41+
pguser: String,
42+
43+
#[arg(long)]
44+
#[clap(default_value = "password")]
45+
#[clap(help = "The name of a user with superuser privileges")]
46+
pgpassword: String,
4447
},
4548
}
4649

@@ -59,31 +62,20 @@ async fn main() -> anyhow::Result<()> {
5962
scale_factor,
6063
seed,
6164
query_ids,
65+
pguser,
66+
pgpassword,
6267
} => {
6368
let tpch_config = TpchConfig {
6469
database: String::from(TPCH_KIT_POSTGRES),
6570
scale_factor,
6671
seed,
6772
query_ids,
6873
};
69-
cardtest(&workspace_dpath, tpch_config).await
74+
let qerrors =
75+
cardtest::cardtest(&workspace_dpath, &pguser, &pgpassword, tpch_config).await?;
76+
println!("qerrors={:?}", qerrors);
7077
}
7178
}
72-
}
73-
74-
async fn cardtest<P: AsRef<Path> + Clone>(
75-
workspace_dpath: P,
76-
tpch_config: TpchConfig,
77-
) -> anyhow::Result<()> {
78-
let pg_db = PostgresDb::new(workspace_dpath.clone());
79-
let df_db = DatafusionDb::new(workspace_dpath).await?;
80-
let databases: Vec<Box<dyn CardtestRunnerDBHelper>> = vec![Box::new(pg_db), Box::new(df_db)];
8179

82-
let tpch_benchmark = Benchmark::Tpch(tpch_config.clone());
83-
let mut cardtest_runner = CardtestRunner::new(databases).await?;
84-
let qerrors = cardtest_runner
85-
.eval_benchmark_qerrors_alldbs(&tpch_benchmark)
86-
.await?;
87-
println!("qerrors: {:?}", qerrors);
8880
Ok(())
8981
}

0 commit comments

Comments
 (0)