@@ -7,21 +7,21 @@ use crate::{benchmark::Benchmark, datafusion_db::DatafusionDb, tpch::TpchConfig}
7
7
use anyhow:: { self } ;
8
8
use async_trait:: async_trait;
9
9
10
- /// This struct performs cardinality testing across one or more databases .
11
- /// Another design would be for the CardtestRunnerDBHelper trait to expose a function
10
+ /// This struct performs cardinality testing across one or more dbmss .
11
+ /// Another design would be for the CardtestRunnerDBMSHelper trait to expose a function
12
12
/// to evaluate the Q-error. However, I chose not to do this design for reasons
13
- /// described in the comments of the CardtestRunnerDBHelper trait. This is why
14
- /// you would use CardtestRunner even for computing the Q-error of a single database .
13
+ /// described in the comments of the CardtestRunnerDBMSHelper trait. This is why
14
+ /// you would use CardtestRunner even for computing the Q-error of a single dbms .
15
15
pub struct CardtestRunner {
16
- pub databases : Vec < Box < dyn CardtestRunnerDBHelper > > ,
16
+ pub dbmss : Vec < Box < dyn CardtestRunnerDBMSHelper > > ,
17
17
}
18
18
19
19
impl CardtestRunner {
20
- pub async fn new ( databases : Vec < Box < dyn CardtestRunnerDBHelper > > ) -> anyhow:: Result < Self > {
21
- Ok ( CardtestRunner { databases } )
20
+ pub async fn new ( dbmss : Vec < Box < dyn CardtestRunnerDBMSHelper > > ) -> anyhow:: Result < Self > {
21
+ Ok ( CardtestRunner { dbmss } )
22
22
}
23
23
24
- /// Get the Q-error of a query using the cost models of all databases being tested
24
+ /// Get the Q-error of a query using the cost models of all dbmss being tested
25
25
/// Q-error is defined in [Leis 2015](https://15721.courses.cs.cmu.edu/spring2024/papers/16-costmodels/p204-leis.pdf)
26
26
/// One detail not specified in the paper is that Q-error is based on the ratio of true and estimated cardinality
27
27
/// of the entire query, not of a subtree of the query. This detail is specified in Section 7.1 of
@@ -32,16 +32,16 @@ impl CardtestRunner {
32
32
) -> anyhow:: Result < HashMap < String , Vec < f64 > > > {
33
33
let mut qerrors_alldbs = HashMap :: new ( ) ;
34
34
35
- for database in & mut self . databases {
36
- let estcards = database . eval_benchmark_estcards ( benchmark) . await ?;
37
- let truecards = database . eval_benchmark_truecards ( benchmark) . await ?;
35
+ for dbms in & mut self . dbmss {
36
+ let estcards = dbms . eval_benchmark_estcards ( benchmark) . await ?;
37
+ let truecards = dbms . eval_benchmark_truecards ( benchmark) . await ?;
38
38
assert ! ( truecards. len( ) == estcards. len( ) ) ;
39
39
let qerrors = estcards
40
40
. into_iter ( )
41
41
. zip ( truecards. into_iter ( ) )
42
42
. map ( |( estcard, truecard) | CardtestRunner :: calc_qerror ( estcard, truecard) )
43
43
. collect ( ) ;
44
- qerrors_alldbs. insert ( String :: from ( database . get_name ( ) ) , qerrors) ;
44
+ qerrors_alldbs. insert ( String :: from ( dbms . get_name ( ) ) , qerrors) ;
45
45
}
46
46
47
47
Ok ( qerrors_alldbs)
@@ -55,27 +55,27 @@ impl CardtestRunner {
55
55
}
56
56
}
57
57
58
- /// This trait defines helper functions to enable cardinality testing on a database
58
+ /// This trait defines helper functions to enable cardinality testing on a dbms
59
59
/// The reason a "get qerror" function is not exposed is to allow for greater
60
- /// flexibility. If we exposed "get qerror" for each database , we would need to
61
- /// get the true and estimated cardinalities for _each_ database . However, we
62
- /// can now choose to only get the true cardinalities of _one_ database to
60
+ /// flexibility. If we exposed "get qerror" for each dbms , we would need to
61
+ /// get the true and estimated cardinalities for _each_ dbms . However, we
62
+ /// can now choose to only get the true cardinalities of _one_ dbms to
63
63
/// improve performance or even cache the true cardinalities. Additionally, if
64
- /// we do want to get the true cardinalities of all databases , we can compare
64
+ /// we do want to get the true cardinalities of all dbmss , we can compare
65
65
/// them against each other to ensure they're all equal. All these options are
66
66
/// possible when exposing "get true card" and "get est card" instead of a
67
67
/// single "get qerror". If you want to compute the Q-error of a single
68
- /// database , just create a CardtestRunner with a single database as input.
68
+ /// dbms , just create a CardtestRunner with a single dbms as input.
69
69
/// When exposing a "get true card" and "get est card" interface, you could
70
70
/// ostensibly do it on the granularity of a single SQL string or on the
71
71
/// granularity of an entire benchmark. I chose the latter for a simple reason:
72
- /// different databases might have different SQL strings for the same conceptual
73
- /// query (see how qgen in tpch-kit takes in database as an input).
72
+ /// different dbmss might have different SQL strings for the same conceptual
73
+ /// query (see how qgen in tpch-kit takes in dbms as an input).
74
74
/// When more performance tests are implemented, you would probably want to extract
75
- /// get_name() into a generic "Database " trait.
75
+ /// get_name() into a generic "DBMS " trait.
76
76
#[ async_trait]
77
- pub trait CardtestRunnerDBHelper {
78
- // get_name() has &self so that we're able to do Box<dyn CardtestRunnerDBHelper >
77
+ pub trait CardtestRunnerDBMSHelper {
78
+ // get_name() has &self so that we're able to do Box<dyn CardtestRunnerDBMSHelper >
79
79
fn get_name ( & self ) -> & str ;
80
80
81
81
// The order of queries has to be the same between these two functions.
@@ -97,12 +97,12 @@ pub async fn cardtest<P: AsRef<Path> + Clone>(
97
97
) -> anyhow:: Result < HashMap < String , Vec < f64 > > > {
98
98
let pg_db = PostgresDb :: new ( workspace_dpath. clone ( ) , pguser, pgpassword) ;
99
99
let df_db = DatafusionDb :: new ( workspace_dpath) . await ?;
100
- let databases : Vec < Box < dyn CardtestRunnerDBHelper > > = vec ! [ Box :: new( pg_db) , Box :: new( df_db) ] ;
100
+ let dbmss : Vec < Box < dyn CardtestRunnerDBMSHelper > > = vec ! [ Box :: new( pg_db) , Box :: new( df_db) ] ;
101
101
102
102
let tpch_benchmark = Benchmark :: Tpch ( tpch_config. clone ( ) ) ;
103
- let mut cardtest_runner = CardtestRunner :: new ( databases ) . await ?;
104
- let qerrors = cardtest_runner
103
+ let mut cardtest_runner = CardtestRunner :: new ( dbmss ) . await ?;
104
+ let qerrors_alldbs = cardtest_runner
105
105
. eval_benchmark_qerrors_alldbs ( & tpch_benchmark)
106
106
. await ?;
107
- Ok ( qerrors )
107
+ Ok ( qerrors_alldbs )
108
108
}
0 commit comments