@@ -2,6 +2,7 @@ use crate::{
2
2
benchmark:: Benchmark ,
3
3
cardtest:: CardtestRunnerDBMSHelper ,
4
4
tpch:: { TpchConfig , TpchKit } ,
5
+ truecard_cache:: DBMSTruecardCache ,
5
6
} ;
6
7
use async_trait:: async_trait;
7
8
use futures:: Sink ;
@@ -17,27 +18,39 @@ use tokio::fs::File;
17
18
use tokio:: io:: AsyncReadExt ;
18
19
use tokio_postgres:: { Client , NoTls , Row } ;
19
20
21
+ /// The name of the Postgres DBMS (as opposed to the DataFusion DBMS for instance)
22
+ pub const POSTGRES_DBMS_NAME : & str = "Postgres" ;
23
+
20
24
/// This dbname is assumed to always exist
21
25
const DEFAULT_DBNAME : & str = "postgres" ;
22
26
23
- pub struct PostgresDb {
27
+ pub struct PostgresDBMS {
24
28
workspace_dpath : PathBuf ,
25
29
pguser : String ,
26
30
pgpassword : String ,
31
+ truecard_cache : DBMSTruecardCache ,
27
32
}
28
33
29
34
/// Conventions I keep for methods of this class:
30
35
/// - Functions should be idempotent. For instance, start_postgres() should not fail if Postgres is already running
31
36
/// - For instance, this is why "createdb" is _not_ a function
32
37
/// - Stop and start functions should be separate
33
38
/// - Setup should be done in build() unless it requires more information (like benchmark)
34
- impl PostgresDb {
35
- pub fn new < P : AsRef < Path > > ( workspace_dpath : P , pguser : & str , pgpassword : & str ) -> Self {
36
- Self {
37
- workspace_dpath : PathBuf :: from ( workspace_dpath. as_ref ( ) ) ,
39
+ impl PostgresDBMS {
40
+ pub fn build < P : AsRef < Path > > (
41
+ workspace_dpath : P ,
42
+ pguser : & str ,
43
+ pgpassword : & str ,
44
+ ) -> anyhow:: Result < Self > {
45
+ let workspace_dpath = PathBuf :: from ( workspace_dpath. as_ref ( ) ) ;
46
+ let truecard_cache = DBMSTruecardCache :: build ( & workspace_dpath, POSTGRES_DBMS_NAME ) ?;
47
+ let pg_dbms = Self {
48
+ workspace_dpath,
38
49
pguser : String :: from ( pguser) ,
39
50
pgpassword : String :: from ( pgpassword) ,
40
- }
51
+ truecard_cache,
52
+ } ;
53
+ Ok ( pg_dbms)
41
54
}
42
55
43
56
/// Create a connection to a Postgres database
@@ -145,6 +158,11 @@ impl PostgresDb {
145
158
Self :: copy_from_stdin ( client, tbl_fpath) . await ?;
146
159
}
147
160
161
+ // create stats
162
+ // you need to do VACUUM FULL ANALYZE and not just ANALYZE to make sure the stats are created in a deterministic way
163
+ // this is standard practice for postgres benchmarking
164
+ client. query ( "VACUUM FULL ANALYZE" , & [ ] ) . await ?;
165
+
148
166
Ok ( ( ) )
149
167
}
150
168
@@ -178,9 +196,9 @@ impl PostgresDb {
178
196
}
179
197
180
198
#[ async_trait]
181
- impl CardtestRunnerDBMSHelper for PostgresDb {
199
+ impl CardtestRunnerDBMSHelper for PostgresDBMS {
182
200
fn get_name ( & self ) -> & str {
183
- "Postgres"
201
+ POSTGRES_DBMS_NAME
184
202
}
185
203
186
204
async fn eval_benchmark_estcards (
@@ -205,13 +223,16 @@ impl CardtestRunnerDBMSHelper for PostgresDb {
205
223
let client = self . connect_to_db ( & dbname) . await ?;
206
224
match benchmark {
207
225
Benchmark :: Test => unimplemented ! ( ) ,
208
- Benchmark :: Tpch ( tpch_config) => self . eval_tpch_truecards ( & client, tpch_config) . await ,
226
+ Benchmark :: Tpch ( tpch_config) => {
227
+ self . eval_tpch_truecards ( & client, tpch_config, & dbname)
228
+ . await
229
+ }
209
230
}
210
231
}
211
232
}
212
233
213
- /// This impl has helpers for ```impl CardtestRunnerDBMSHelper for PostgresDb ```
214
- impl PostgresDb {
234
+ /// This impl has helpers for ```impl CardtestRunnerDBMSHelper for PostgresDBMS ```
235
+ impl PostgresDBMS {
215
236
async fn eval_tpch_estcards (
216
237
& self ,
217
238
client : & Client ,
@@ -231,17 +252,25 @@ impl PostgresDb {
231
252
}
232
253
233
254
async fn eval_tpch_truecards (
234
- & self ,
255
+ & mut self ,
235
256
client : & Client ,
236
257
tpch_config : & TpchConfig ,
258
+ dbname : & str , // used by truecard_cache
237
259
) -> anyhow:: Result < Vec < usize > > {
238
260
let tpch_kit = TpchKit :: build ( & self . workspace_dpath ) ?;
239
261
tpch_kit. gen_queries ( tpch_config) ?;
240
262
241
263
let mut truecards = vec ! [ ] ;
242
264
for sql_fpath in tpch_kit. get_sql_fpath_ordered_iter ( tpch_config) ? {
243
265
let sql = fs:: read_to_string ( sql_fpath) ?;
244
- let truecard = self . eval_query_truecard ( client, & sql) . await ?;
266
+ let truecard = match self . truecard_cache . get_truecard ( dbname, & sql) {
267
+ Some ( truecard) => truecard,
268
+ None => {
269
+ let truecard = self . eval_query_truecard ( client, & sql) . await ?;
270
+ self . truecard_cache . insert_truecard ( dbname, & sql, truecard) ;
271
+ truecard
272
+ }
273
+ } ;
245
274
truecards. push ( truecard) ;
246
275
}
247
276
@@ -259,7 +288,7 @@ impl PostgresDb {
259
288
self . log_explain ( & explain_rows) ;
260
289
// the first line contains the explain of the root node
261
290
let first_explain_line: & str = explain_rows. first ( ) . unwrap ( ) . get ( 0 ) ;
262
- let estcard = PostgresDb :: extract_row_count ( first_explain_line) . unwrap ( ) ;
291
+ let estcard = PostgresDBMS :: extract_row_count ( first_explain_line) . unwrap ( ) ;
263
292
Ok ( estcard)
264
293
}
265
294
0 commit comments