-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_ssm_rds_patch_automation_document.yaml
265 lines (232 loc) · 11.4 KB
/
create_ssm_rds_patch_automation_document.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
AWSTemplateFormatVersion: '2010-09-09'
Description: 'CloudFormation template for RDS PostgreSQL Fleet Upgrade Automation Document'
Resources:
AutomationServiceRole:
Type: AWS::IAM::Role
Properties:
RoleName: RDSPostgreSQL-fleet-upgrade-automation-role
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Principal:
Service: ssm.amazonaws.com
Action: sts:AssumeRole
- Effect: Allow
Principal:
Service:
- ec2.amazonaws.com
Action:
- sts:AssumeRole
ManagedPolicyArns:
- arn:aws:iam::aws:policy/service-role/AmazonSSMAutomationRole
RDSPostgreSQLFleetUpgrade:
Type: AWS::SSM::Document
Properties:
Name: RDS-PostgreSQL-Fleet-Upgrade
DocumentType: Automation
Content:
schemaVersion: '0.3'
description: Upgrade fleet or RDS PostgreSQL instances
parameters:
ScriptsDirOnEC2:
type: String
description: The directory where scripts will be stored
default: /tmp/rds_patch
AssumeIAMRole:
type: String
description: The ARN of the role that allows Automation to perform the actions on your behalf.
default: !GetAtt AutomationServiceRole.Arn
EC2InstanceId:
type: String
description: The ID of the existing EC2 Instance
RunPreUpgradeTasks:
type: String
description: Whether to run pre-upgrade tasks (PREUPGRADE) or upgrade tasks (UPGRADE)
default: PREUPGRADE
allowedValues:
- PREUPGRADE
- UPGRADE
SnsTopicArnEmail:
type: String
description: SNS Topic ARN for email notifications
default: ""
S3BucketName:
type: String
description: Name of the S3 bucket containing the shell script and related log files can be stored
default: ""
S3ScriptKey:
type: String
description: The S3 key (path) to the shell script
default: rds_psql_patch.sh
DBTagValue:
type: String
description: The tag value to identify RDS PostgreSQL instances
default: 'Y'
DBTagKey:
type: String
description: The tag key to identify RDS PostgreSQL instances
default: UpgradeDB
TargetEngineVersion:
type: String
description: Target Engine Version (Sample value 15.8)
assumeRole: '{{ AssumeIAMRole }}'
mainSteps:
- name: DownloadAndSaveScript
action: aws:runCommand
nextStep: UpgradeInstances
isEnd: false
onFailure: Abort
inputs:
DocumentName: AWS-RunShellScript
InstanceIds:
- '{{ EC2InstanceId }}'
Parameters:
commands:
- |
#!/bin/bash
set -euo pipefail
# e: exits on error
# u: exits on undefined variables
# o pipefail: exits if any command in a pipe fails
# Debug information
# echo "Current user: $(whoami)"
# echo "Home directory: $HOME"
# Setup environment
export AWS_REGION="{{ global:REGION }}"
export AWS_DEFAULT_REGION="{{ global:REGION }}"
export RunPreUpgradeTasks="{{ RunPreUpgradeTasks }}"
# Create directory
mkdir -p {{ ScriptsDirOnEC2 }}
# Download file
aws s3 cp s3://{{ S3BucketName }}/{{ S3ScriptKey }} {{ ScriptsDirOnEC2 }}/rds_psql_patch.sh
# Make script executable
chmod +x {{ ScriptsDirOnEC2 }}/rds_psql_patch.sh
# Verify final state
echo "Final script permissions:"
ls -la {{ ScriptsDirOnEC2 }}/rds_psql_patch.sh
- name: UpgradeInstances
action: aws:runCommand
isEnd: true
onFailure: Abort
inputs:
DocumentName: AWS-RunShellScript
InstanceIds:
- '{{ EC2InstanceId }}'
Parameters:
commands:
- |
#!/bin/bash
set -euo pipefail
# Setup environment
export AWS_REGION="{{ global:REGION }}"
export AWS_DEFAULT_REGION="{{ global:REGION }}"
export RunPreUpgradeTasks="{{ RunPreUpgradeTasks }}"
# Create logs directory with timestamp suffix
TIMESTAMP=$(date '+%Y%m%d-%H-%M-%S')
LOGS_DIR="{{ ScriptsDirOnEC2 }}/logs"
MASTER_LOG="${LOGS_DIR}/${RunPreUpgradeTasks}-master-${TIMESTAMP}.log"
mkdir -p "${LOGS_DIR}"
# Store instance list in a file to ensure consistency
INSTANCE_LIST_FILE="${LOGS_DIR}/${RunPreUpgradeTasks}-instance_list.txt"
log_message() {
local instance="$1"
local message="$2"
local log_file="$3"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "[${timestamp}] [${instance}] ${message}" | tee -a "${log_file}"
}
export TAG_KEY="{{ DBTagKey }}"
export TAG_VALUE="{{ DBTagValue }}"
aws rds describe-db-instances --filters "Name=engine,Values=postgres" --output json | \
jq -r --arg key "$TAG_KEY" --arg value "$TAG_VALUE" \
'.DBInstances[] | select(.TagList[] | select(.Key==$key and .Value==$value)) | .DBInstanceIdentifier' \
> "${INSTANCE_LIST_FILE}"
INSTANCES=$(cat "${INSTANCE_LIST_FILE}")
log_message "MASTER" "Found instances to upgrade: ${INSTANCES}" "${MASTER_LOG}"
COUNT=0
declare -a PIDS=()
cleanup() {
log_message "MASTER" "Cleaning up background processes..." "${MASTER_LOG}"
for pid in "${PIDS[@]}"; do
kill -9 $pid 2>/dev/null || true
done
exit 1
}
trap cleanup SIGINT SIGTERM
export SCRIPTS_DIR="{{ ScriptsDirOnEC2 }}"
export S3_BUCKET_PATCH_LOGS="{{ S3BucketName }}"
export SNS_TOPIC_ARN_EMAIL="{{ SnsTopicArnEmail }}"
while IFS= read -r INSTANCE || [[ -n "$INSTANCE" ]]; do
INSTANCE_LOG_DIR="${LOGS_DIR}/${INSTANCE}"
mkdir -p "${INSTANCE_LOG_DIR}"
INSTANCE_LOG="${INSTANCE_LOG_DIR}/${RunPreUpgradeTasks}-${TIMESTAMP}.log"
(
cd "${SCRIPTS_DIR}"
log_message "${INSTANCE}" "Starting upgrade process" "${INSTANCE_LOG}"
if ./rds_psql_patch.sh "${INSTANCE}" "{{ TargetEngineVersion }}" "{{ RunPreUpgradeTasks }}" >> "${INSTANCE_LOG}" 2>&1; then
log_message "${INSTANCE}" "${RunPreUpgradeTasks} completed." "${INSTANCE_LOG}"
echo "SUCCESS" > "${INSTANCE_LOG_DIR}/${RunPreUpgradeTasks}-status"
else
log_message "${INSTANCE}" "${RunPreUpgradeTasks} failed" "${INSTANCE_LOG}"
echo "FAILED" > "${INSTANCE_LOG_DIR}/${RunPreUpgradeTasks}-status"
exit 1
fi
) &
PIDS+=($!)
COUNT=$((COUNT+1))
log_message "MASTER" "Started upgrade process for ${INSTANCE} with PID ${PIDS[-1]}" "${MASTER_LOG}"
done < "${INSTANCE_LIST_FILE}"
EXIT_STATUS=0
FAILED_INSTANCES=""
for pid in "${PIDS[@]}"; do
if ! wait $pid; then
EXIT_STATUS=1
fi
done
log_message "MASTER" "Upgrade initiated for $COUNT instances" "${MASTER_LOG}"
# Logging summary for each instance
while IFS= read -r INSTANCE || [[ -n "$INSTANCE" ]]; do
INSTANCE_LOG_DIR="${LOGS_DIR}/${INSTANCE}"
INSTANCE_LOG="${INSTANCE_LOG_DIR}/${RunPreUpgradeTasks}-${TIMESTAMP}.log"
if [ -f "${INSTANCE_LOG}" ]; then
log_message "MASTER" "" "${MASTER_LOG}"
log_message "MASTER" "===== Log summary for instance: ${INSTANCE} =====" "${MASTER_LOG}"
while IFS= read -r line; do
log_message "MASTER" "$line" "${MASTER_LOG}"
done < <(egrep "INFO|ERROR|IMPORTANT|WARN" "${INSTANCE_LOG}" | egrep -v -i "vacuuming|pages|scanned|analyzing" || true)
log_message "MASTER" "===== End of log summary for instance: ${INSTANCE} =====" "${MASTER_LOG}"
log_message "MASTER" "" "${MASTER_LOG}"
# Check instance status
if [ -f "${INSTANCE_LOG_DIR}/${RunPreUpgradeTasks}-status" ]; then
STATUS=$(cat "${INSTANCE_LOG_DIR}/${RunPreUpgradeTasks}-status")
if [ "${STATUS}" = "FAILED" ]; then
FAILED_INSTANCES="${FAILED_INSTANCES} ${INSTANCE}"
fi
else
FAILED_INSTANCES="${FAILED_INSTANCES} ${INSTANCE}"
fi
else
log_message "MASTER" "" "${MASTER_LOG}"
log_message "MASTER" "WARNING: Log file not found for ${INSTANCE}: ${INSTANCE_LOG}" "${MASTER_LOG}"
log_message "MASTER" "" "${MASTER_LOG}"
FAILED_INSTANCES="${FAILED_INSTANCES} ${INSTANCE}"
fi
done < "${INSTANCE_LIST_FILE}"
if [ $EXIT_STATUS -eq 0 ] && [ -z "${FAILED_INSTANCES}" ]; then
log_message "MASTER" "INFO: All ${RunPreUpgradeTasks} tasks completed." "${MASTER_LOG}"
else
log_message "MASTER" "ERROR: Some ${RunPreUpgradeTasks} tasks failed. Failed instances:${FAILED_INSTANCES}" "${MASTER_LOG}"
fi
if [ -n "{{ S3BucketName }}" ]; then
#log_message "MASTER" "INFO: Uploading logs to S3..." "${MASTER_LOG}"
aws s3 cp "${LOGS_DIR}" "s3://{{ S3BucketName }}/logs/$(date '+%Y-%m-%d')/" --recursive
fi
exit $EXIT_STATUS
Outputs:
SSMDocumentName:
Description: Name of the SSM Automation Document
Value: !Ref RDSPostgreSQLFleetUpgrade
SSMDocumentAssumeRoleArn:
Description: The ARN of the role that allows Automation to perform the actions on your behalf.
Value: !GetAtt AutomationServiceRole.Arn