Skip to content

Commit 16f26ba

Browse files
committed
K8SPSMDB-1080 - Use trap to catch exit status
1 parent 31091a0 commit 16f26ba

File tree

27 files changed

+14
-81
lines changed
  • e2e-tests
    • arbiter
    • balancer
    • cross-site-sharded
    • data-at-rest-encryption
    • data-sharded
    • default-cr
    • demand-backup
    • demand-backup-physical
    • demand-backup-physical-sharded
    • demand-backup-sharded
    • expose-sharded
    • init-deploy
    • mongod-major-upgrade
    • mongod-major-upgrade-sharded
    • monitoring-2-0
    • multi-cluster-service
    • rs-shard-migration
    • self-healing-chaos
    • service-per-pod
    • smart-update
    • split-horizon
    • tls-issue-cert-manager
    • upgrade
    • upgrade-sharded
    • version-service

27 files changed

+14
-81
lines changed

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ EOF
505505

506506
unstash 'IMAGE'
507507
def IMAGE = sh(returnStdout: true, script: "cat results/docker/TAG").trim()
508-
TestsReport = TestsReport + "\r\n\r\ncommit: ${env.CHANGE_URL}/commits/${env.GIT_COMMIT}\r\nimage: `${IMAGE}`\r\n"
508+
TestsReport = TestsReport + "\r\n\r\ncommit: ${env.CHANGE_URL}/commits/${env.GIT_COMMIT}\r\nimage: `${IMAGE}`\r\nlogs: s3://percona-jenkins-artifactory/cloud-psmdb-operator/PR-${env.CHANGE_ID}/${env.GIT_COMMIT}/logs/"
509509
pullRequest.comment(TestsReport)
510510
}
511511
}

e2e-tests/arbiter/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ check_cr_config() {
3131
if [[ $(kubectl_bin get pod \
3232
--selector=statefulset.kubernetes.io/pod-name="${cluster}-arbiter-0" \
3333
-o jsonpath='{.items[*].status.containerStatuses[?(@.name == "mongod-arbiter")].restartCount}') -gt 0 ]]; then
34-
collect_k8s_logs
3534
echo "Something went wrong with arbiter. Exiting..."
3635
exit 1
3736
fi

e2e-tests/balancer/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ check_balancer() {
1515
| grep -E -v "Percona Server for MongoDB|connecting to:|Implicit session:|versions do not match|Error saving history file:|bye")
1616

1717
if [[ $balancer_running != "$expected" ]]; then
18-
collect_k8s_logs
1918
echo "Unexpected output from \"db.adminCommand({balancerStatus: 1}).mode\": $balancer_running"
2019
echo "Expected $expected"
2120
exit 1

e2e-tests/cross-site-sharded/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ for i in "rs0" "rs1"; do
101101
done
102102

103103
if [[ $shards -lt 2 ]]; then
104-
collect_k8s_logs
105104
echo "data is only on some of the shards, maybe sharding is not working"
106105
exit 1
107106
fi

e2e-tests/data-at-rest-encryption/run

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ encrypted_cluster_log=$(kubectl_bin logs some-name-rs0-0 -c mongod -n $namespace
8383

8484
echo "$encrypted_cluster_log"
8585
if [ -z "$encrypted_cluster_log" ]; then
86-
collect_k8s_logs
8786
echo "Cluster is not encrypted"
8887
exit 1
8988
fi
@@ -100,7 +99,6 @@ until [ "$retry" -ge 10 ]; do
10099
echo "Cluster is not encrypted already"
101100
break
102101
elif [ $retry == 15 ]; then
103-
collect_k8s_logs
104102
echo "Max retry count $retry reached. Cluster is still encrypted"
105103
exit 1
106104
else

e2e-tests/data-sharded/run

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ check_rs_proper_component_deletion() {
1717
until [[ $(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length') -eq 0 ]]; do
1818
let retry+=1
1919
if [ $retry -ge 70 ]; then
20-
collect_k8s_logs
2120
sts_count=$(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length')
2221
echo "Replset $rs_name not properly removed, expected sts count of 0 but got $sts_count. Exiting after $retry tries..."
2322
exit 1
@@ -116,7 +115,6 @@ main() {
116115
done
117116

118117
if [[ $shards -lt 3 ]]; then
119-
collect_k8s_logs
120118
echo "data is only on some of the shards, maybe sharding is not working"
121119
exit 1
122120
fi
@@ -127,7 +125,6 @@ main() {
127125
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
128126
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls")
129127
if ! echo $res | grep -q '"ok" : 1'; then
130-
collect_k8s_logs
131128
echo "app database not dropped. Exiting.."
132129
exit 1
133130
fi

e2e-tests/default-cr/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ function stop_cluster() {
2727
let passed_time="${passed_time}+${sleep_time}"
2828
sleep ${sleep_time}
2929
if [[ ${passed_time} -gt ${max_wait_time} ]]; then
30-
collect_k8s_logs
3130
echo "We've been waiting for cluster stop for too long. Exiting..."
3231
exit 1
3332
fi

e2e-tests/demand-backup-physical-sharded/run

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ run_recovery_check() {
3838
wait_restore "${backup_name}" "${cluster}" "ready" "0" "1800"
3939
kubectl_bin get psmdb ${cluster} -o yaml
4040
if [ $(kubectl_bin get psmdb ${cluster} -o yaml | yq '.metadata.annotations."percona.com/resync-pbm"') == null ]; then
41-
collect_k8s_logs
4241
echo "psmdb/${cluster} should be annotated with percona.com/resync-pbm after a physical restore"
4342
exit 1
4443
fi
@@ -53,7 +52,6 @@ check_exported_mongos_service_endpoint() {
5352
local host=$1
5453

5554
if [ "$host" != "$(kubectl_bin get psmdb $cluster -o=jsonpath='{.status.host}')" ]; then
56-
collect_k8s_logs
5755
echo "Exported host is not correct after the restore"
5856
exit 1
5957
fi
@@ -82,7 +80,6 @@ wait_cluster_consistency ${cluster}
8280
lbEndpoint=$(kubectl_bin get svc $cluster-mongos -o=jsonpath='{.status}' |
8381
jq -r 'select(.loadBalancer != null and .loadBalancer.ingress != null and .loadBalancer.ingress != []) | .loadBalancer.ingress[0][]')
8482
if [ -z $lbEndpoint ]; then
85-
collect_k8s_logs
8683
echo "mongos service not exported correctly"
8784
exit 1
8885
fi

e2e-tests/demand-backup-physical/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ run_recovery_check() {
3838
wait_restore "${backup_name}" "${cluster}" "ready" "0" "1800"
3939
kubectl_bin get psmdb ${cluster} -o yaml
4040
if [ $(kubectl_bin get psmdb ${cluster} -o yaml | yq '.metadata.annotations."percona.com/resync-pbm"') == null ]; then
41-
collect_k8s_logs
4241
echo "psmdb/${cluster} should be annotated with percona.com/resync-pbm after a physical restore"
4342
exit 1
4443
fi

e2e-tests/demand-backup-sharded/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest
166166
/usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \
167167
| grep -c ${backup_dest_minio}_ | cat)
168168
if [[ $backup_exists -eq 1 ]]; then
169-
collect_k8s_logs
170169
echo "Backup was not removed from bucket -- minio"
171170
exit 1
172171
fi

e2e-tests/demand-backup/run

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest
135135
/usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \
136136
| grep -c ${backup_dest_minio} | cat)
137137
if [[ $backup_exists -eq 1 ]]; then
138-
collect_k8s_logs
139138
echo "Backup was not removed from bucket -- minio"
140139
exit 1
141140
fi
@@ -171,7 +170,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest
171170
/usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \
172171
| grep -c ${backup_dest_minio} | cat)
173172
if [[ $backup_exists -eq 1 ]]; then
174-
collect_k8s_logs
175173
echo "Backup was not removed from bucket -- minio"
176174
exit 1
177175
fi

e2e-tests/expose-sharded/run

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ function stop_cluster() {
2323
let passed_time="${passed_time}+${sleep_time}"
2424
sleep ${passed_time}
2525
if [[ ${passed_time} -gt ${max_wait_time} ]]; then
26-
collect_k8s_logs
2726
echo "We've been waiting for cluster stop for too long. Exiting..."
2827
exit 1
2928
fi
@@ -53,7 +52,6 @@ function compare_mongo_config() {
5352
rs0_0_endpoint_actual=$(run_mongo 'var host;var x=0;rs.conf().members.forEach(function(d){ if(d.tags.podName=="some-name-rs0-0"){ host=rs.conf().members[x].host;print(host)};x=x+1; })' "clusterAdmin:clusterAdmin123456@${cluster}-rs0.${namespace}" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:|bye')
5453

5554
if [[ $rs0_0_endpoint_actual != "$rs0_0_endpoint:27017" || $cfg_0_endpoint_actual != "$cfg_0_endpoint:27017" ]]; then
56-
collect_k8s_logs
5755
desc "Actual values rs $rs0_0_endpoint_actual and cfg $cfg_0_endpoint_actual do not match expected rs $rs0_0_endpoint:27017 and cfg $cfg_0_endpoint:27017"
5856
exit 1
5957
fi

e2e-tests/functions

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,15 @@ conf_dir=$(realpath $test_dir/../conf || :)
2828
src_dir=$(realpath $test_dir/../..)
2929
logs_dir=$(realpath $test_dir/../logs)
3030

31+
trap cleanup EXIT HUP INT QUIT TERM
32+
cleanup() {
33+
exit_code=$?
34+
if [[ ${exit_code} -ne 0 ]]; then
35+
collect_k8s_logs
36+
fi
37+
exit ${exit_code}
38+
}
39+
3140
if [[ ${ENABLE_LOGGING} == "true" ]]; then
3241
if [ ! -d "${logs_dir}" ]; then
3342
mkdir "${logs_dir}"
@@ -150,7 +159,6 @@ wait_pod() {
150159
echo -n .
151160
let retry+=1
152161
if [ $retry -ge 360 ]; then
153-
collect_k8s_logs
154162
kubectl_bin describe pod/$pod
155163
kubectl_bin logs $pod
156164
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
@@ -159,7 +167,6 @@ wait_pod() {
159167
| grep -v 'Getting tasks for pod' \
160168
| grep -v 'Getting pods from source' \
161169
| tail -100
162-
163170
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
164171
exit 1
165172
fi
@@ -179,14 +186,12 @@ wait_cron() {
179186
echo -n .
180187
let retry+=1
181188
if [ $retry -ge 360 ]; then
182-
collect_k8s_logs
183189
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
184190
| grep -v 'level=info' \
185191
| grep -v 'level=debug' \
186192
| grep -v 'Getting tasks for pod' \
187193
| grep -v 'Getting pods from source' \
188194
| tail -100
189-
190195
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
191196
exit 1
192197
fi
@@ -205,10 +210,8 @@ wait_backup_agent() {
205210
echo -n .
206211
let retry+=1
207212
if [ $retry -ge 360 ]; then
208-
collect_k8s_logs
209213
kubectl_bin logs $agent_pod -c backup-agent \
210214
| tail -100
211-
212215
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
213216
exit 1
214217
fi
@@ -230,14 +233,12 @@ wait_backup() {
230233
let retry+=1
231234
current_status=$(kubectl_bin get psmdb-backup $backup_name -o jsonpath='{.status.state}')
232235
if [[ $retry -ge 360 || ${current_status} == 'error' ]]; then
233-
collect_k8s_logs
234236
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
235237
| grep -v 'level=info' \
236238
| grep -v 'level=debug' \
237239
| grep -v 'Getting tasks for pod' \
238240
| grep -v 'Getting pods from source' \
239241
| tail -100
240-
241242
echo "Backup object psmdb-backup/${backup_name} is in ${current_state} state."
242243
echo something went wrong with operator or kubernetes cluster
243244
exit 1
@@ -291,14 +292,12 @@ wait_deployment() {
291292
echo -n .
292293
let retry+=1
293294
if [ $retry -ge 360 ]; then
294-
collect_k8s_logs
295295
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
296296
| grep -v 'level=info' \
297297
| grep -v 'level=debug' \
298298
| grep -v 'Getting tasks for pod' \
299299
| grep -v 'Getting pods from source' \
300300
| tail -100
301-
302301
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
303302
exit 1
304303
fi
@@ -339,7 +338,6 @@ wait_restore() {
339338
let retry+=1
340339
current_state=$(kubectl_bin get psmdb-restore restore-$backup_name -o jsonpath='{.status.state}')
341340
if [[ $retry -ge $wait_time || ${current_state} == 'error' ]]; then
342-
collect_k8s_logs
343341
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
344342
| grep -v 'level=info' \
345343
| grep -v 'level=debug' \
@@ -553,7 +551,6 @@ retry() {
553551

554552
until "$@"; do
555553
if [[ $n -ge $max ]]; then
556-
collect_k8s_logs
557554
echo "The command '$@' has failed after $n attempts."
558555
exit 1
559556
fi
@@ -593,7 +590,6 @@ wait_for_running() {
593590
timeout=$((timeout + 1))
594591
echo -n '.'
595592
if [[ ${timeout} -gt 1500 ]]; then
596-
collect_k8s_logs
597593
echo
598594
echo "Waiting timeout has been reached. Exiting..."
599595
exit 1
@@ -616,14 +612,12 @@ wait_for_delete() {
616612
echo -n .
617613
let retry+=1
618614
if [ $retry -ge $wait_time ]; then
619-
collect_k8s_logs
620615
kubectl logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
621616
| grep -v 'level=info' \
622617
| grep -v 'level=debug' \
623618
| grep -v 'Getting tasks for pod' \
624619
| grep -v 'Getting pods from source' \
625620
| tail -100
626-
627621
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
628622
exit 1
629623
fi
@@ -639,8 +633,6 @@ compare_generation() {
639633

640634
current_generation="$(kubectl_bin get ${resource_type} "${resource_name}" -o jsonpath='{.metadata.generation}')"
641635
if [[ ${generation} != "${current_generation}" ]]; then
642-
collect_k8s_logs
643-
644636
echo "Generation for ${resource_type}/${resource_name} is: ${current_generation}, but should be: ${generation}"
645637
exit 1
646638
fi
@@ -1011,7 +1003,6 @@ get_service_endpoint() {
10111003
return
10121004
fi
10131005

1014-
collect_k8s_logs
10151006
exit 1
10161007
}
10171008

@@ -1150,9 +1141,6 @@ kubectl_bin() {
11501141
cat "$LAST_OUT"
11511142
cat "$LAST_ERR" >&2
11521143
rm "$LAST_OUT" "$LAST_ERR"
1153-
if [ ${exit_status} != 0 ]; then
1154-
collect_k8s_logs
1155-
fi
11561144
return ${exit_status}
11571145
}
11581146

@@ -1191,7 +1179,6 @@ wait_cluster_consistency() {
11911179
until [[ "$(kubectl_bin get psmdb "${cluster_name}" -o jsonpath='{.status.state}')" == "ready" ]]; do
11921180
let retry+=1
11931181
if [ $retry -ge $wait_time ]; then
1194-
collect_k8s_logs
11951182
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
11961183
exit 1
11971184
fi
@@ -1218,7 +1205,6 @@ check_backup_deletion() {
12181205
retry=0
12191206
until [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 403 ]] || [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 404 ]]; do
12201207
if [ $retry -ge 10 ]; then
1221-
collect_k8s_logs
12221208
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
12231209
echo "Backup was not removed from bucket -- $storage_name"
12241210
exit 1
@@ -1280,7 +1266,6 @@ function get_mongod_ver_from_image() {
12801266
version_info=$(run_simple_cli_inside_image ${image} 'mongod --version' | $sed -r 's/^.*db version v(([0-9]+\.){2}[0-9]+-[0-9]+).*$/\1/g')
12811267

12821268
if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+-[0-9]+$ ]]; then
1283-
collect_k8s_logs
12841269
printf "No mongod version obtained from %s. Exiting" ${image}
12851270
exit 1
12861271
fi
@@ -1293,7 +1278,6 @@ function get_pbm_version() {
12931278
local version_info=$(run_simple_cli_inside_image ${image} 'pbm-agent version' | $sed -r 's/^Version:\ (([0-9]+\.){2}[0-9]+)\ .*/\1/g')
12941279

12951280
if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+$ ]]; then
1296-
collect_k8s_logs
12971281
printf "No pbm version obtained from %s. Exiting" ${image}
12981282
exit 1
12991283
fi

e2e-tests/init-deploy/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ compare_mongo_cmd "find" "myApp:myPass@$cluster-2.$cluster.$namespace"
6161
desc 'check number of connections'
6262
conn_count=$(run_mongo 'db.serverStatus().connections.current' "clusterAdmin:clusterAdmin123456@$cluster.$namespace" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|bye')
6363
if [ ${conn_count} -gt ${max_conn} ]; then
64-
collect_k8s_logs
6564
echo "Mongo connection count ${conn_count} is greater than maximum connection count limit: ${max_conn}"
6665
exit 1
6766
fi

e2e-tests/mongod-major-upgrade-sharded/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ function main() {
9494
| grep -E '^\{.*\}$' | jq -r '.featureCompatibilityVersion.version')
9595

9696
if [[ ${currentFCV} != ${version} ]]; then
97-
collect_k8s_logs
9897
echo "FCV at the moment is ${currentFCV} and is not set to ${version} as it should. Exiting..."
9998
exit 1
10099
fi

e2e-tests/mongod-major-upgrade/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ function main() {
8989
| grep -E '^\{.*\}$' | jq -r '.featureCompatibilityVersion.version')
9090

9191
if [[ ${currentFCV} != ${version} ]]; then
92-
collect_k8s_logs
9392
echo "FCV at the moment is ${currentFCV} and is not set to ${version} as it should. Exiting..."
9493
exit 1
9594
fi

e2e-tests/monitoring-2-0/run

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ until kubectl_bin exec monitoring-0 -- bash -c "ls -l /proc/*/exe 2>/dev/null| g
3737
sleep 5
3838
let retry+=1
3939
if [ $retry -ge 20 ]; then
40-
collect_k8s_logs
4140
echo "Max retry count $retry reached. Pmm-server can't start"
4241
exit 1
4342
fi
@@ -151,7 +150,6 @@ if [[ -n ${OPENSHIFT} ]]; then
151150
fi
152151

153152
if [[ $(kubectl_bin logs monitoring-rs0-0 pmm-client | grep -c 'cannot auto discover databases and collections') != 0 ]]; then
154-
collect_k8s_logs
155153
echo "error: cannot auto discover databases and collections"
156154
exit 1
157155
fi

0 commit comments

Comments
 (0)