-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathybio.sql
234 lines (224 loc) · 10.8 KB
/
ybio.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
/*
* This work is porting PGIO (https://github.com/therealkevinc/pgio)
* to be run from PL/pgSQL only. The idea follows Kevin Closson SLOB method
* (https://kevinclosson.net/slob/) where we focus on the component we want
* to benchmark. For example, if we want to measure the read path down to disk
* (PGIO - Physical IO) we will run a workload that will get random rows on a
* working set that is larger than the available memory. And avoid any other
* work (network calls, query parsing, reading through a large document,...
*
* We have two procedures create there:
* - setup() will create tables of the required size
* - runit() will run the benchmark for the specified time
* and one table:
* - benchruns will store the runs results
*/
drop procedure if exists setup;
/*
* SETUP
* As we may want that each session reads its own table, or the same table (if what
* we want to test is concurrent access) we have a table name prefix and a number. The
* default will create bench0001. We insert in several batches to fill a total number
* of rows. Rows have a "mykey" int column, indexed, which is the one we will query.
* The "scratch" column is random to be sure to scatter rows within the table. The
* "filler" just adds some bytes to it.
*/
create or replace procedure setup(
-- table name is built from prefix + number
tab_prefix text default 'bench',
tab_num int default 1,
-- number of table rows to insert
tab_rows bigint default 1e6,
-- rows are inserted by batch of batch_size rows
batch_size bigint default 1000,
-- split into tablets (0 means the default)
tab_tablets int default 0,
ind_tablets int default 0,
-- by default for clustered table we don't create a PK but for heap tables we can.
index_as_pk boolean default false,
-- filler characters (not very useful here)
filler int default 1,
-- drops the table to recreate it
recreate boolean default true,
-- scatter primary keys by ordering on scratch
orderby text default 'scratch'
) language plpgsql as
$setup$
declare
clock_start timestamp;
ind_split_clause text:='';
begin
-- there's a flag to drop the existing tables
if recreate then execute format('drop table if exists %I',tab_prefix||to_char(tab_num,'fm0000')); end if;
-- create the table
execute format('create table if not exists %I (mykey bigint, scratch bigint, filler char(%s)) %s',tab_prefix||to_char(tab_num,'fm0000'),filler,case tab_tablets when 0 then '' else format('split into %s tablets',tab_tablets)end);
-- index the table on mykey (could be done afterwards but I like homogenous work)
-- build the SPLIT AT clause for CREATE INDEX
if ind_tablets>1 then
ind_split_clause:=format(' split at values(');
for i in 1..ind_tablets-1 loop
if i>1 then ind_split_clause:=format('%s%s',ind_split_clause,','); end if;
ind_split_clause:=format('%s(%s)',ind_split_clause,ceil(i*tab_rows/ind_tablets));
end loop;
ind_split_clause:=format('%s)',ind_split_clause);
end if;
if index_as_pk then
execute format('alter table %I add constraint %I_pk_mykey primary key (mykey)',tab_prefix||to_char(tab_num,'fm0000'),tab_prefix||to_char(tab_num,'fm0000'),ind_split_clause);
else
execute format('create index if not exists %I_asc_mykey on %I(mykey asc)%s',tab_prefix||to_char(tab_num,'fm0000'),tab_prefix||to_char(tab_num,'fm0000'),ind_split_clause);
end if;
-- insert rows in several passes
raise notice 'Inserting % rows in % batches of %',tab_rows,ceil(tab_rows/batch_size),batch_size;
clock_start= clock_timestamp();
for i in 1..ceil(tab_rows/batch_size) loop
-- generate numbers and shuffle them with the random scratch
execute format('insert into %I
select * from (select generate_series::bigint*%s+%s mykey, (random()*%s)::bigint as scratch , lpad(%L,%s,md5(random()::text)) filler
from generate_series(1,%s) ) as generated %s'
,tab_prefix||to_char(tab_num,'fm0000'),ceil(tab_rows/batch_size),i,tab_rows,'',filler,batch_size,coalesce('order by '||orderby,''));
-- output a message for each loop
raise notice 'Table % Progress: % % (% rows) at % rows/s',tab_prefix||to_char(tab_num,'fm0000'),to_char((100*(i::float)/ceil(tab_rows/batch_size)),'999.99'),'%',to_char(i*batch_size,'99999999999'),to_char((i*batch_size)/extract(epoch from clock_timestamp()-clock_start),'999999');
-- intermediate commit for each batch
commit;
end loop;
END; $setup$;
/* the results of the runs will be stored in a "benchruns" table
* where the most interesting will be:
* num_rows/extract(epoch from end_time-start_time)
* the number or rows read per second
*/
drop table if exists benchruns;
create table benchruns(job_id serial,start_time timestamp, end_time timestamp
,num_batches int, num_rows bigint, pct_update int, max_scratch bigint
, prepared boolean, index_only boolean, tab_rows int, batch_size int
,table_name text, table_rows bigint, table_scratch bigint, comments text
, primary key(job_id));
/*
* RUNIT
* This will be called by one session, specifying the table it works on, with "tab_num"
* and the number of rows: it will read at random some rows between 1 and "tab_rows"
* (so "tab_rows" must be equal or lower than the one used to create the table)
* In order to focus on reading rows, we range scan an index on "mykey" where we know
* that rows are scattered in all table, to get "batch_size" rows for each execution of
* the select. A large "batch_size" avoids to spend time on other layers than read rows.
*/
drop procedure if exists runit;
create or replace procedure runit(
-- table name is built from prefix + number
tab_prefix text default 'bench',
tab_num int default 1,
-- random reads will be done on [1..table_rows] id (must have enough rows in the table)
tab_rows bigint default 1e6,
-- each execute will read batch_size random rows in the range scan
batch_size bigint default 1e4,
-- the job stops after run_duration
run_duration interval default interval '1 minute',
-- precent of updates
pct_update int default 0,
-- prepared statements by default (see https://dev.to/aws-heroes/postgresql-prepared-statements-in-pl-pgsql-jl3)
prepared boolean default true,
-- doesn't read more columns than the index one if index_only
index_only boolean default false,
-- starts by counting the rows (and verifies tab_rows)
initial_count boolean default false,
comments text default null
) language plpgsql as
$runit$ <<this>>
declare
clock_start timestamp;
clock_end timestamp;
job_id int:=null;
num_rows float:=0;
num_updated float:=0;
num_batches int:=0;
out_count int;
out_scratch bigint;
max_scratch bigint:=0;
sql_select text;
sql_update text;
first_key int;
begin
-- the batch size cannot be larger than the number or rows (num_rows should be a multiple actually)
if batch_size > tab_rows then batch_size:=tab_rows; end if;
-- we can try an index_only access path if we want to range scan the index only
if index_only then
sql_select:='select count(*),max(mykey) from %I where mykey between $1 and $2';
sql_update:='with u as (update %I set mykey=mykey where mykey between $1 and $2 returning 1,mykey) select count(*),max(mykey) from u';
else
sql_select:='select count(*),max(scratch) from %I where mykey between $1 and $2';
sql_update:='with u as (update %I set scratch=scratch+1 where mykey between $1 and $2 returning 1,scratch) select count(*),max(scratch) from u';
end if;
-- start: count rows in the table (if flag for this - can take time)
if initial_count then
execute format('select count(*),max(scratch) from "%I"',tab_prefix||to_char(tab_num,'fm0000')) into strict out_count,out_scratch;
if out_count < tab_rows then raise exception 'Cannot read % rows from a % rows table',tab_rows,out_rows; end if;
end if;
if prepared then
-- deallocate all prepared statements in case the previous run failed
deallocate all;
end if;
-- insert info about this run
insert into benchruns(start_time, prepared,index_only,tab_rows,pct_update,batch_size,table_name,table_rows,table_scratch,comments)
values (clock_timestamp(),runit.prepared,runit.index_only,runit.tab_rows,runit.pct_update,runit.batch_size,tab_prefix||to_char(tab_num,'fm0000'),out_count,out_scratch,runit.comments)
returning benchruns.job_id into this.job_id;
commit;
if prepared then
--deallocate all;
execute 'prepare myselect(integer,integer) as '||format(sql_select,tab_prefix||to_char(tab_num,'fm0000'));
execute 'prepare myupdate(integer,integer) as '||format(sql_update,tab_prefix||to_char(tab_num,'fm0000'));
end if;
clock_start= clock_timestamp();
clock_end := clock_start + run_duration ;
loop
first_key:=trunc(random()*((tab_rows-batch_size)-1)+1);
if (pct_update=100) or (100*(num_updated+0.5*batch_size)/(num_rows+batch_size)<pct_update) then
-- UPDATE:
if prepared then
execute format('execute myupdate(%s,%s)',first_key,first_key+batch_size-1) into strict out_count,out_scratch;
else
execute format(sql_update,tab_prefix||to_char(tab_num,'fm0000')) into out_count,out_scratch using first_key,first_key+batch_size-1;
end if;
num_updated:=num_updated+out_count;
else
-- SELECT:
if prepared then
execute format('execute myselect(%s,%s)',first_key,first_key+batch_size-1) into strict out_count,out_scratch;
else
execute format(sql_select,tab_prefix||to_char(tab_num,'fm0000')) into out_count,out_scratch using first_key,first_key+batch_size-1;
end if;
end if;
num_batches=num_batches+1;
num_rows=num_rows+out_count;
if out_scratch>max_scratch then max_scratch=out_scratch; end if;
if num_rows>0 then
raise notice '% rows/s on %, job: % batch#: %, total: % rows read, % % updated, last: % rows between % and %'
,to_char(round(num_rows/extract(epoch from clock_timestamp()-clock_start)),'999999') -- RIOPS from start
,tab_prefix||to_char(tab_num,'fm0000') -- table name
,to_char(job_id,'99999') -- job number
,to_char(num_batches,'9999999') -- number of iterations from start
,to_char(num_rows,'99999999999') -- total number of rows read
,to_char(100*num_updated/num_rows,'999D9'),'%' -- percentage updated
,(out_count) -- number of rows read
,(first_key+batch_size-1) -- the between range end
,(first_key) -- the between range start
;
else
raise notice 'No rows found where mykey between % and % --> tab_rows should match the one used at setup()',first_key,first_key+batch_size-1;
end if;
-- intermediate commit for each batch
commit;
exit when clock_timestamp() >= clock_end;
end loop;
if prepared then
deallocate myselect;
deallocate myupdate;
end if;
update benchruns
set start_time=this.clock_start
, end_time=clock_timestamp()
, num_rows=this.num_rows
, num_batches=this.num_batches
, max_scratch=this.max_scratch
where benchruns.job_id=this.job_id;
commit;
END; $runit$;