|
| 1 | +pg_replication: |
| 2 | + query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag" |
| 3 | + master: true |
| 4 | + metrics: |
| 5 | + - lag: |
| 6 | + usage: "GAUGE" |
| 7 | + description: "Replication lag behind master in seconds" |
| 8 | + |
| 9 | +pg_postmaster: |
| 10 | + query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()" |
| 11 | + master: true |
| 12 | + metrics: |
| 13 | + - start_time_seconds: |
| 14 | + usage: "GAUGE" |
| 15 | + description: "Time at which postmaster started" |
| 16 | + |
| 17 | +pg_stat_user_tables: |
| 18 | + query: | |
| 19 | + SELECT |
| 20 | + current_database() datname, |
| 21 | + schemaname, |
| 22 | + relname, |
| 23 | + seq_scan, |
| 24 | + seq_tup_read, |
| 25 | + idx_scan, |
| 26 | + idx_tup_fetch, |
| 27 | + n_tup_ins, |
| 28 | + n_tup_upd, |
| 29 | + n_tup_del, |
| 30 | + n_tup_hot_upd, |
| 31 | + n_live_tup, |
| 32 | + n_dead_tup, |
| 33 | + n_mod_since_analyze, |
| 34 | + COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, |
| 35 | + COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, |
| 36 | + COALESCE(last_analyze, '1970-01-01Z') as last_analyze, |
| 37 | + COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, |
| 38 | + vacuum_count, |
| 39 | + autovacuum_count, |
| 40 | + analyze_count, |
| 41 | + autoanalyze_count |
| 42 | + FROM |
| 43 | + pg_stat_user_tables |
| 44 | + metrics: |
| 45 | + - datname: |
| 46 | + usage: "LABEL" |
| 47 | + description: "Name of current database" |
| 48 | + - schemaname: |
| 49 | + usage: "LABEL" |
| 50 | + description: "Name of the schema that this table is in" |
| 51 | + - relname: |
| 52 | + usage: "LABEL" |
| 53 | + description: "Name of this table" |
| 54 | + - seq_scan: |
| 55 | + usage: "COUNTER" |
| 56 | + description: "Number of sequential scans initiated on this table" |
| 57 | + - seq_tup_read: |
| 58 | + usage: "COUNTER" |
| 59 | + description: "Number of live rows fetched by sequential scans" |
| 60 | + - idx_scan: |
| 61 | + usage: "COUNTER" |
| 62 | + description: "Number of index scans initiated on this table" |
| 63 | + - idx_tup_fetch: |
| 64 | + usage: "COUNTER" |
| 65 | + description: "Number of live rows fetched by index scans" |
| 66 | + - n_tup_ins: |
| 67 | + usage: "COUNTER" |
| 68 | + description: "Number of rows inserted" |
| 69 | + - n_tup_upd: |
| 70 | + usage: "COUNTER" |
| 71 | + description: "Number of rows updated" |
| 72 | + - n_tup_del: |
| 73 | + usage: "COUNTER" |
| 74 | + description: "Number of rows deleted" |
| 75 | + - n_tup_hot_upd: |
| 76 | + usage: "COUNTER" |
| 77 | + description: "Number of rows HOT updated (i.e., with no separate index update required)" |
| 78 | + - n_live_tup: |
| 79 | + usage: "GAUGE" |
| 80 | + description: "Estimated number of live rows" |
| 81 | + - n_dead_tup: |
| 82 | + usage: "GAUGE" |
| 83 | + description: "Estimated number of dead rows" |
| 84 | + - n_mod_since_analyze: |
| 85 | + usage: "GAUGE" |
| 86 | + description: "Estimated number of rows changed since last analyze" |
| 87 | + - last_vacuum: |
| 88 | + usage: "GAUGE" |
| 89 | + description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)" |
| 90 | + - last_autovacuum: |
| 91 | + usage: "GAUGE" |
| 92 | + description: "Last time at which this table was vacuumed by the autovacuum daemon" |
| 93 | + - last_analyze: |
| 94 | + usage: "GAUGE" |
| 95 | + description: "Last time at which this table was manually analyzed" |
| 96 | + - last_autoanalyze: |
| 97 | + usage: "GAUGE" |
| 98 | + description: "Last time at which this table was analyzed by the autovacuum daemon" |
| 99 | + - vacuum_count: |
| 100 | + usage: "COUNTER" |
| 101 | + description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)" |
| 102 | + - autovacuum_count: |
| 103 | + usage: "COUNTER" |
| 104 | + description: "Number of times this table has been vacuumed by the autovacuum daemon" |
| 105 | + - analyze_count: |
| 106 | + usage: "COUNTER" |
| 107 | + description: "Number of times this table has been manually analyzed" |
| 108 | + - autoanalyze_count: |
| 109 | + usage: "COUNTER" |
| 110 | + description: "Number of times this table has been analyzed by the autovacuum daemon" |
| 111 | + |
| 112 | +pg_statio_user_tables: |
| 113 | + query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables" |
| 114 | + metrics: |
| 115 | + - datname: |
| 116 | + usage: "LABEL" |
| 117 | + description: "Name of current database" |
| 118 | + - schemaname: |
| 119 | + usage: "LABEL" |
| 120 | + description: "Name of the schema that this table is in" |
| 121 | + - relname: |
| 122 | + usage: "LABEL" |
| 123 | + description: "Name of this table" |
| 124 | + - heap_blks_read: |
| 125 | + usage: "COUNTER" |
| 126 | + description: "Number of disk blocks read from this table" |
| 127 | + - heap_blks_hit: |
| 128 | + usage: "COUNTER" |
| 129 | + description: "Number of buffer hits in this table" |
| 130 | + - idx_blks_read: |
| 131 | + usage: "COUNTER" |
| 132 | + description: "Number of disk blocks read from all indexes on this table" |
| 133 | + - idx_blks_hit: |
| 134 | + usage: "COUNTER" |
| 135 | + description: "Number of buffer hits in all indexes on this table" |
| 136 | + - toast_blks_read: |
| 137 | + usage: "COUNTER" |
| 138 | + description: "Number of disk blocks read from this table's TOAST table (if any)" |
| 139 | + - toast_blks_hit: |
| 140 | + usage: "COUNTER" |
| 141 | + description: "Number of buffer hits in this table's TOAST table (if any)" |
| 142 | + - tidx_blks_read: |
| 143 | + usage: "COUNTER" |
| 144 | + description: "Number of disk blocks read from this table's TOAST table indexes (if any)" |
| 145 | + - tidx_blks_hit: |
| 146 | + usage: "COUNTER" |
| 147 | + description: "Number of buffer hits in this table's TOAST table indexes (if any)" |
| 148 | + |
| 149 | +# WARNING: This set of metrics can be very expensive on a busy server as every unique query executed will create an additional time series |
| 150 | +pg_stat_statements: |
| 151 | + query: "SELECT t2.rolname, t3.datname, queryid, calls, total_time / 1000 as total_time_seconds, min_time / 1000 as min_time_seconds, max_time / 1000 as max_time_seconds, mean_time / 1000 as mean_time_seconds, stddev_time / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t2.rolname != 'rdsadmin'" |
| 152 | + master: true |
| 153 | + metrics: |
| 154 | + - rolname: |
| 155 | + usage: "LABEL" |
| 156 | + description: "Name of user" |
| 157 | + - datname: |
| 158 | + usage: "LABEL" |
| 159 | + description: "Name of database" |
| 160 | + - queryid: |
| 161 | + usage: "LABEL" |
| 162 | + description: "Query ID" |
| 163 | + - calls: |
| 164 | + usage: "COUNTER" |
| 165 | + description: "Number of times executed" |
| 166 | + - total_time_seconds: |
| 167 | + usage: "COUNTER" |
| 168 | + description: "Total time spent in the statement, in milliseconds" |
| 169 | + - min_time_seconds: |
| 170 | + usage: "GAUGE" |
| 171 | + description: "Minimum time spent in the statement, in milliseconds" |
| 172 | + - max_time_seconds: |
| 173 | + usage: "GAUGE" |
| 174 | + description: "Maximum time spent in the statement, in milliseconds" |
| 175 | + - mean_time_seconds: |
| 176 | + usage: "GAUGE" |
| 177 | + description: "Mean time spent in the statement, in milliseconds" |
| 178 | + - stddev_time_seconds: |
| 179 | + usage: "GAUGE" |
| 180 | + description: "Population standard deviation of time spent in the statement, in milliseconds" |
| 181 | + - rows: |
| 182 | + usage: "COUNTER" |
| 183 | + description: "Total number of rows retrieved or affected by the statement" |
| 184 | + - shared_blks_hit: |
| 185 | + usage: "COUNTER" |
| 186 | + description: "Total number of shared block cache hits by the statement" |
| 187 | + - shared_blks_read: |
| 188 | + usage: "COUNTER" |
| 189 | + description: "Total number of shared blocks read by the statement" |
| 190 | + - shared_blks_dirtied: |
| 191 | + usage: "COUNTER" |
| 192 | + description: "Total number of shared blocks dirtied by the statement" |
| 193 | + - shared_blks_written: |
| 194 | + usage: "COUNTER" |
| 195 | + description: "Total number of shared blocks written by the statement" |
| 196 | + - local_blks_hit: |
| 197 | + usage: "COUNTER" |
| 198 | + description: "Total number of local block cache hits by the statement" |
| 199 | + - local_blks_read: |
| 200 | + usage: "COUNTER" |
| 201 | + description: "Total number of local blocks read by the statement" |
| 202 | + - local_blks_dirtied: |
| 203 | + usage: "COUNTER" |
| 204 | + description: "Total number of local blocks dirtied by the statement" |
| 205 | + - local_blks_written: |
| 206 | + usage: "COUNTER" |
| 207 | + description: "Total number of local blocks written by the statement" |
| 208 | + - temp_blks_read: |
| 209 | + usage: "COUNTER" |
| 210 | + description: "Total number of temp blocks read by the statement" |
| 211 | + - temp_blks_written: |
| 212 | + usage: "COUNTER" |
| 213 | + description: "Total number of temp blocks written by the statement" |
| 214 | + - blk_read_time_seconds: |
| 215 | + usage: "COUNTER" |
| 216 | + description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" |
| 217 | + - blk_write_time_seconds: |
| 218 | + usage: "COUNTER" |
| 219 | + description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" |
| 220 | + |
| 221 | +pg_process_idle: |
| 222 | + query: | |
| 223 | + WITH |
| 224 | + metrics AS ( |
| 225 | + SELECT |
| 226 | + application_name, |
| 227 | + SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum, |
| 228 | + COUNT(*) AS process_idle_seconds_count |
| 229 | + FROM pg_stat_activity |
| 230 | + WHERE state = 'idle' |
| 231 | + GROUP BY application_name |
| 232 | + ), |
| 233 | + buckets AS ( |
| 234 | + SELECT |
| 235 | + application_name, |
| 236 | + le, |
| 237 | + SUM( |
| 238 | + CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le |
| 239 | + THEN 1 |
| 240 | + ELSE 0 |
| 241 | + END |
| 242 | + )::bigint AS bucket |
| 243 | + FROM |
| 244 | + pg_stat_activity, |
| 245 | + UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le |
| 246 | + GROUP BY application_name, le |
| 247 | + ORDER BY application_name, le |
| 248 | + ) |
| 249 | + SELECT |
| 250 | + application_name, |
| 251 | + process_idle_seconds_sum as seconds_sum, |
| 252 | + process_idle_seconds_count as seconds_count, |
| 253 | + ARRAY_AGG(le) AS seconds, |
| 254 | + ARRAY_AGG(bucket) AS seconds_bucket |
| 255 | + FROM metrics JOIN buckets USING (application_name) |
| 256 | + GROUP BY 1, 2, 3 |
| 257 | + metrics: |
| 258 | + - application_name: |
| 259 | + usage: "LABEL" |
| 260 | + description: "Application Name" |
| 261 | + - seconds: |
| 262 | + usage: "HISTOGRAM" |
| 263 | + description: "Idle time of server processes" |
0 commit comments