Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit b6f1768

Browse files
feat: custom "workspaces" for generated data (#109)
**Summary**: Instead of generating data (queries, schema, etc.) directly inside the optd repo, we generate them inside a custom "workspace" directory. **Demo**: https://github.com/cmu-db/optd/assets/20631215/56931dce-6ac0-499f-a968-dde34f103f70 **Details**: * The current main use case of this is to run tests locally with a "clean" workspace without needing to delete local files * Workspaces are a CLI arg * Relative paths are allowed and are interpreted relative to the root directory of the optd repository using `parse_pathstr()`
1 parent aae6046 commit b6f1768

File tree

5 files changed

+59
-42
lines changed

5 files changed

+59
-42
lines changed

.gitignore

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,4 @@
22
/.vscode
33
/.DS_Store
44
/.idea
5-
.history
6-
optd-perftest/**/genned_tables
7-
optd-perftest/**/genned_queries
8-
optd-perftest/**/tpch-kit
9-
optd-perftest/**/pgdata
10-
optd-perftest/**/postgres_log
5+
.history

optd-perftest/src/main.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::{fs, path::Path};
2+
13
use cardtest::{CardtestRunner, CardtestRunnerDBHelper};
24
use clap::{Parser, Subcommand};
35
use postgres_db::PostgresDb;
@@ -16,6 +18,12 @@ mod tpch;
1618

1719
#[derive(Parser)]
1820
struct Cli {
21+
#[arg(long)]
22+
#[clap(default_value = "../optd_perftest_workspace")]
23+
#[clap(
24+
help = "The directory where artifacts required for performance testing (such as pgdata or TPC-H queries) are generated. See comment of parse_pathstr() to see what paths are allowed (TLDR: absolute and relative both ok)."
25+
)]
26+
workspace: String,
1927
#[command(subcommand)]
2028
command: Commands,
2129
}
@@ -37,20 +45,28 @@ async fn main() -> anyhow::Result<()> {
3745
env_logger::init();
3846
let cli = Cli::parse();
3947

48+
let workspace_dpath = shell::parse_pathstr(&cli.workspace)?;
49+
if !workspace_dpath.exists() {
50+
fs::create_dir(&workspace_dpath)?;
51+
}
52+
4053
match &cli.command {
4154
Commands::Cardtest { scale_factor, seed } => {
4255
let tpch_config = TpchConfig {
4356
database: String::from(TPCH_KIT_POSTGRES),
4457
scale_factor: *scale_factor,
4558
seed: *seed,
4659
};
47-
cardtest(tpch_config).await
60+
cardtest(&workspace_dpath, tpch_config).await
4861
}
4962
}
5063
}
5164

52-
async fn cardtest(tpch_config: TpchConfig) -> anyhow::Result<()> {
53-
let pg_db = PostgresDb::build().await?;
65+
async fn cardtest<P: AsRef<Path>>(
66+
workspace_dpath: P,
67+
tpch_config: TpchConfig,
68+
) -> anyhow::Result<()> {
69+
let pg_db = PostgresDb::build(workspace_dpath).await?;
5470
let databases: Vec<Box<dyn CardtestRunnerDBHelper>> = vec![Box::new(pg_db)];
5571

5672
let tpch_benchmark = Benchmark::Tpch(tpch_config.clone());

optd-perftest/src/postgres_db.rs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::{
77
use async_trait::async_trait;
88
use regex::Regex;
99
use std::{
10-
env::{self, consts::OS},
10+
env::consts::OS,
1111
fs::{self, File},
1212
path::{Path, PathBuf},
1313
process::Command,
@@ -17,6 +17,7 @@ use tokio_postgres::{Client, NoTls};
1717
const OPTD_DBNAME: &str = "optd";
1818

1919
pub struct PostgresDb {
20+
workspace_dpath: PathBuf,
2021
// is an option because we need to initialize the struct before setting this
2122
client: Option<Client>,
2223

@@ -32,17 +33,12 @@ pub struct PostgresDb {
3233
/// - Stop and start functions should be separate
3334
/// - Setup should be done in build() unless it requires more information (like benchmark)
3435
impl PostgresDb {
35-
pub async fn build() -> anyhow::Result<Self> {
36+
pub async fn build<P: AsRef<Path>>(workspace_dpath: P) -> anyhow::Result<Self> {
3637
log::debug!("[start] building PostgresDb");
3738

3839
// build paths, sometimes creating them if they don't exist
39-
let curr_dpath = env::current_dir()?;
40-
let postgres_db_dpath = Path::new(file!())
41-
.parent()
42-
.unwrap()
43-
.join("postgres_db")
44-
.to_path_buf();
45-
let postgres_db_dpath = curr_dpath.join(postgres_db_dpath); // make it absolute
40+
let workspace_dpath = workspace_dpath.as_ref().to_path_buf();
41+
let postgres_db_dpath = workspace_dpath.join("postgres_db");
4642
if !postgres_db_dpath.exists() {
4743
fs::create_dir(&postgres_db_dpath)?;
4844
}
@@ -51,6 +47,7 @@ impl PostgresDb {
5147

5248
// create Self
5349
let mut db = PostgresDb {
50+
workspace_dpath,
5451
client: None,
5552
_postgres_db_dpath: postgres_db_dpath,
5653
pgdata_dpath,
@@ -222,7 +219,7 @@ impl PostgresDb {
222219
// deleting pgdata would also delete the old connection so we have to reconnect
223220
self.connect_to_postgres().await?;
224221
// load the schema
225-
let tpch_kit = TpchKit::build()?;
222+
let tpch_kit = TpchKit::build(&self.workspace_dpath)?;
226223
shell::run_command_with_status_check(&format!(
227224
"psql {} -f {}",
228225
OPTD_DBNAME,
@@ -279,7 +276,7 @@ impl CardtestRunnerDBHelper for PostgresDb {
279276
/// This impl has helpers for ```impl CardtestRunnerDBHelper for PostgresDb```
280277
impl PostgresDb {
281278
async fn eval_tpch_estcards(&self, tpch_config: &TpchConfig) -> anyhow::Result<Vec<usize>> {
282-
let tpch_kit = TpchKit::build()?;
279+
let tpch_kit = TpchKit::build(&self.workspace_dpath)?;
283280
tpch_kit.gen_queries(tpch_config)?;
284281

285282
let mut estcards = vec![];
@@ -293,7 +290,7 @@ impl PostgresDb {
293290
}
294291

295292
async fn eval_tpch_truecards(&self, tpch_config: &TpchConfig) -> anyhow::Result<Vec<usize>> {
296-
let tpch_kit = TpchKit::build()?;
293+
let tpch_kit = TpchKit::build(&self.workspace_dpath)?;
297294
tpch_kit.gen_queries(tpch_config)?;
298295

299296
let mut truecards = vec![];

optd-perftest/src/shell.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
use std::path::Path;
1+
use std::path::{Path, PathBuf};
22
use std::process::{Command, Output};
3+
use std::str;
34
use std::{fs, io};
45

56
/// Runs a command, exiting the program immediately if the command fails
@@ -37,3 +38,22 @@ where
3738
}
3839
Ok(())
3940
}
41+
42+
/// Get the path of the root "optd" repo directory
43+
pub fn get_optd_root() -> io::Result<PathBuf> {
44+
let output = run_command_with_status_check("git rev-parse --show-toplevel")?;
45+
let path = str::from_utf8(&output.stdout).unwrap().trim();
46+
let path = PathBuf::from(path);
47+
Ok(path)
48+
}
49+
50+
/// Can be an absolute path or a relative path. Regardless of where this CLI is run, relative paths are evaluated relative to the optd repo root.
51+
pub fn parse_pathstr(pathstr: &str) -> io::Result<PathBuf> {
52+
let path = PathBuf::from(pathstr);
53+
let path = if path.is_relative() {
54+
get_optd_root()?.join(path)
55+
} else {
56+
path
57+
};
58+
Ok(path)
59+
}

optd-perftest/src/tpch.rs

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ impl TpchConfig {
3030
/// Since it's conceptually a wrapper around the repo, I chose _not_ to make
3131
/// TpchConfig an initialization parameter.
3232
pub struct TpchKit {
33+
_workspace_dpath: PathBuf,
34+
3335
// cache these paths so we don't have to build them multiple times
34-
optd_repo_dpath: PathBuf,
3536
_tpch_dpath: PathBuf,
3637
tpch_kit_repo_dpath: PathBuf,
3738
queries_dpath: PathBuf,
@@ -43,20 +44,14 @@ pub struct TpchKit {
4344

4445
/// I keep the same conventions for these methods as I do for PostgresDb
4546
impl TpchKit {
46-
pub fn build() -> io::Result<Self> {
47+
pub fn build<P: AsRef<Path>>(workspace_dpath: P) -> io::Result<Self> {
4748
log::debug!("[start] building TpchKit");
4849

4950
// build paths, sometimes creating them if they don't exist
50-
// we assume that this is being run in the base optd repo dir
51-
let optd_repo_dpath = env::current_dir()?;
52-
let tpch_dpath = Path::new(file!())
53-
.parent()
54-
.unwrap()
55-
.join("tpch")
56-
.to_path_buf();
57-
let tpch_dpath = optd_repo_dpath.join(tpch_dpath); // make tpch_dpath absolute
51+
let workspace_dpath = workspace_dpath.as_ref().to_path_buf();
52+
let tpch_dpath = workspace_dpath.join("tpch");
5853
if !tpch_dpath.exists() {
59-
fs::create_dir(&tpch_dpath).unwrap_or_else(|_| panic!("tpch_dpath ({:?}) doesn't exist. make sure that the current dir is the base repo dir. right now, the current dir is {:?}", tpch_dpath, optd_repo_dpath));
54+
fs::create_dir(&tpch_dpath)?;
6055
}
6156
let tpch_kit_repo_dpath = tpch_dpath.join("tpch-kit");
6257
let dbgen_dpath = tpch_kit_repo_dpath.join("dbgen");
@@ -73,7 +68,7 @@ impl TpchKit {
7368

7469
// create Self
7570
let kit = TpchKit {
76-
optd_repo_dpath,
71+
_workspace_dpath: workspace_dpath,
7772
_tpch_dpath: tpch_dpath,
7873
tpch_kit_repo_dpath,
7974
queries_dpath,
@@ -94,10 +89,6 @@ impl TpchKit {
9489
Ok(kit)
9590
}
9691

97-
fn cd_to_optd(&self) -> io::Result<()> {
98-
env::set_current_dir(&self.optd_repo_dpath)
99-
}
100-
10192
fn clonepull_tpch_kit_repo(&self) -> io::Result<()> {
10293
if !self.tpch_kit_repo_dpath.exists() {
10394
log::debug!("[start] cloning tpch-kit repo");
@@ -114,7 +105,7 @@ impl TpchKit {
114105
log::debug!("[start] pulling latest tpch-kit repo");
115106
shell::run_command_with_status_check("git pull")?;
116107
log::debug!("[end] pulling latest tpch-kit repo");
117-
self.cd_to_optd()
108+
Ok(())
118109
}
119110

120111
fn build_dbgen(&self, database: &str) -> io::Result<()> {
@@ -126,7 +117,7 @@ impl TpchKit {
126117
database
127118
))?;
128119
log::debug!("[end] building dbgen");
129-
self.cd_to_optd()
120+
Ok(())
130121
}
131122

132123
fn get_machine() -> &'static str {
@@ -155,7 +146,6 @@ impl TpchKit {
155146
"./dbgen -s{}",
156147
tpch_config.scale_factor
157148
))?;
158-
self.cd_to_optd()?;
159149
File::create(done_fpath)?;
160150
log::debug!("[end] generating tables for {}", tpch_config.get_stringid());
161151
} else {
@@ -189,7 +179,6 @@ impl TpchKit {
189179
this_genned_queries_dpath.join(format!("{}.sql", query_i));
190180
fs::write(&this_genned_queries_fpath, output.stdout)?;
191181
}
192-
self.cd_to_optd()?;
193182
File::create(done_fpath)?;
194183
log::debug!(
195184
"[end] generating queries for {}",

0 commit comments

Comments
 (0)