-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathcheck_delayed_jobs.sh
executable file
·112 lines (91 loc) · 2.49 KB
/
check_delayed_jobs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/bin/sh
DATABASE="$1"
EXPECTED_WORKERS="$2"
if [ -z "${DATABASE}" ] || [ -z "${EXPECTED_WORKERS}" ]; then
echo "CRITICAL - Please provide two parameters : database and expected workers"
exit 2
fi
cd /
WARN_THRESHOLD=1
CRIT_THRESHOLD=3
output() {
PERFDATA="'workers'=${PID_COUNT};;;0;${EXPECTED_WORKERS}"
PERFDATA="${PERFDATA} 'running'=${RUNNING};;;0;${EXPECTED_WORKERS}"
PERFDATA="${PERFDATA} 'zombies'=${ZOMBIES};;;0;"
PERFDATA="${PERFDATA} 'waiting'=${WAITING};${WARN_THRESHOLD};${CRIT_THRESHOLD};0;"
echo "$* | ${PERFDATA}"
}
unknown() {
output "UNKNOWN - $*"
exit 3
}
critical() {
output "CRITICAL - $*"
exit 2
}
warning() {
output "WARNING - $*"
exit 1
}
ok() {
output "OK - $*"
exit 0
}
# Get delayed_job PIDs.
PID_ARRAY=$(pgrep -fa delayed_job -u mapotempo | sed -re ":a;s/([0-9]+)\\s+(delayed_job\\.[0-9]+)\\s+/'\\2 host:$(hostname) pid:\\1'/;N;s/\\n/, /;ta")
PID_COUNT=$(pgrep -f delayed_job -u mapotempo | wc -l)
# Get count of waiting jobs.
WAITING=$(sudo -u postgres psql "${DATABASE}" -A -t <<EOF
select count(1)
from delayed_jobs
where locked_by is null
and failed_at is null
EOF
)
if [ "${PID_COUNT}" -gt 0 ]; then
# Get count of running jobs.
RUNNING=$(sudo -u postgres psql "${DATABASE}" -A -t <<EOF
select count(1)
from delayed_jobs
where locked_by is not null
and failed_at is null
and locked_by in (${PID_ARRAY})
EOF
)
# Get count of zombie jobs
ZOMBIES=$(sudo -u postgres psql "${DATABASE}" -A -t <<EOF
select count(1)
from delayed_jobs
where locked_by is not null
and locked_by not in (${PID_ARRAY})
EOF
)
else
RUNNING=0
# Get count of zombie jobs
ZOMBIES=$(sudo -u postgres psql "${DATABASE}" -A -t <<EOF
select count(1)
from delayed_jobs
where locked_by is not null
EOF
)
fi
if [ "${PID_COUNT}" -ne "${EXPECTED_WORKERS}" ]; then
critical "Found ${PID_COUNT} workers but ${EXPECTED_WORKERS} are expected."
fi
if [ "${WAITING}" -ge "${CRIT_THRESHOLD}" ]; then
critical "Found ${WAITING} jobs waiting."
fi
if [ "${WAITING}" -gt 1 ]; then
if [ "${RUNNING}" -lt "${EXPECTED_WORKERS}" ]; then
critical "Found ${WAITING} waiting jobs and only ${RUNNING} running jobs (expected ${EXPECTED_WORKERS})."
fi
fi
if [ "${WAITING}" -ge "${WARN_THRESHOLD}" ]; then
warning "Found ${WAITING} waiting jobs."
fi
if [ "${ZOMBIES}" -gt 0 ]; then
warning "Found ${ZOMBIES} zombie jobs."
fi
ok "Found ${RUNNING} running jobs over ${PID_COUNT} workers and ${WAITING} waiting jobs."
exit 0