Skip to content

Commit 251a106

Browse files
committed
add incident probes
Signed-off-by: Sylvain Hellegouarch <[email protected]>
1 parent c00a6d0 commit 251a106

File tree

5 files changed

+278
-31
lines changed

5 files changed

+278
-31
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
[Unreleased]: https://github.com/chaostoolkit-incubator/chaostoolkit-aws/compare/0.25.0...HEAD
66

7+
### Added
8+
9+
- Probes to work with the incident center
10+
711
## [0.25.0][] - 2023-10-27
812

913
[0.25.0]: https://github.com/chaostoolkit-incubator/chaostoolkit-aws/compare/0.24.0...0.25.0

chaosaws/__init__.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
2-
from typing import Any, Dict, List
2+
from datetime import datetime, timedelta, timezone
3+
from typing import Any, Dict, List, Optional, Union
34

45
import boto3
56
import requests
@@ -260,4 +261,30 @@ def load_exported_activities() -> List[DiscoveredActivities]:
260261
activities.extend(discover_actions("chaosaws.s3.actions"))
261262
activities.extend(discover_activities("chaosaws.s3.controls.upload", "control"))
262263
activities.extend(discover_probes("chaosaws.xray.probes"))
264+
activities.extend(discover_probes("chaosaws.incidents.probes"))
263265
return activities
266+
267+
268+
def time_to_datetime(
269+
ts: Union[str, float], offset: Optional[datetime] = None
270+
) -> datetime:
271+
if isinstance(ts, float):
272+
return datetime.fromtimestamp(ts, tz=timezone.utc)
273+
274+
if ts == "now":
275+
return datetime.now().astimezone(tz=timezone.utc)
276+
277+
offset = offset or datetime.now().astimezone(tz=timezone.utc)
278+
quantity, unit = ts.split(" ", 1)
279+
duration = float(quantity)
280+
281+
if unit in ("second", "seconds"):
282+
delta = 1
283+
elif unit in ("minute", "minutes"):
284+
delta = 60
285+
elif unit in ("hour", "hours"):
286+
delta = 60 * 60
287+
elif unit in ("day", "days"):
288+
delta = 60 * 60 * 24
289+
290+
return offset - timedelta(seconds=duration * delta)

chaosaws/incidents/__init__.py

Whitespace-only changes.

chaosaws/incidents/probes.py

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
from typing import Any, Dict, Optional, Union
2+
3+
from chaoslib.exceptions import ActivityFailed
4+
from chaoslib.types import Configuration, Secrets
5+
from logzero import logger
6+
7+
from chaosaws import aws_client, time_to_datetime
8+
9+
__all__ = [
10+
"get_incidents",
11+
"get_active_incidents",
12+
"get_resolved_incidents",
13+
"has_incident_been_opened",
14+
"has_incident_been_resolved",
15+
"get_active_incident_items",
16+
"get_resolved_incident_items",
17+
]
18+
19+
20+
def get_incidents(
21+
impact: int = 1,
22+
status: str = "OPEN",
23+
created_in_the_last: Union[str, float] = "3 minutes",
24+
created_by: Optional[str] = None,
25+
configuration: Configuration = None,
26+
secrets: Secrets = None,
27+
) -> Dict[str, Any]:
28+
"""
29+
Return a list of incidents by their `impact`, `status` and created within
30+
the given window.
31+
32+
The window is either a string such as `3 minutes` and is relative to now
33+
minues these 3 minutes. Or it can be a number of seconds as real value.
34+
35+
You may restrict to the incidents created by a given resouce/role by
36+
setting the `created_by` arn.
37+
38+
See also:
39+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ssm-incidents.html
40+
""" # noqa: E501
41+
end = time_to_datetime("now")
42+
start = time_to_datetime(created_in_the_last, offset=end)
43+
44+
filters = [
45+
{"condition": {"equals": {"integerValues": [impact]}}, "key": "impact"},
46+
{"condition": {"after": start}, "key": "creationTime"},
47+
{"condition": {"equals": {"stringValues": [status]}}, "key": "status"},
48+
]
49+
50+
if created_by:
51+
filters.append(
52+
{
53+
"condition": {"equals": {"stringValues": [created_by]}},
54+
"key": "createdBy",
55+
}
56+
)
57+
58+
client = aws_client("ssm-incidents", configuration, secrets)
59+
try:
60+
logger.debug(
61+
f"Requesting incidents between {start} and {end} with impact "
62+
f"{impact} and status {status} and created by {created_by or 'n/a'}"
63+
)
64+
response = client.list_incident_records(
65+
filters=filters,
66+
maxResults=10,
67+
)
68+
logger.debug(f"Found {len(response['incidentRecordSummaries'])} incidents")
69+
except Exception as e:
70+
# catchall as boto3 exception management is so poorly documented
71+
logger.debug("Failed to call AWS SSM Incidents API", exc_info=True)
72+
raise ActivityFailed(f"SSM Incidents API failed: {str(e)}")
73+
74+
return response
75+
76+
77+
def get_active_incidents(
78+
impact: int = 1,
79+
created_in_the_last: Union[str, float] = "3 minutes",
80+
created_by: Optional[str] = None,
81+
configuration: Configuration = None,
82+
secrets: Secrets = None,
83+
) -> Dict[str, Any]:
84+
"""
85+
Return a list of opened incidents by their `impact` and created within the
86+
given window.
87+
88+
The window is either a string such as `3 minutes` and is relative to now
89+
minues these 3 minutes. Or it can be a number of seconds as real value.
90+
91+
You may restrict to the incidents created by a given resouce/role by
92+
setting the `created_by` arn.
93+
94+
See also:
95+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ssm-incidents.html
96+
""" # noqa: E501
97+
return get_incidents(
98+
impact,
99+
"OPEN",
100+
created_in_the_last,
101+
created_by,
102+
configuration=configuration,
103+
secrets=secrets,
104+
)
105+
106+
107+
def get_resolved_incidents(
108+
impact: int = 1,
109+
created_in_the_last: Union[str, float] = "3 minutes",
110+
created_by: Optional[str] = None,
111+
configuration: Configuration = None,
112+
secrets: Secrets = None,
113+
) -> Dict[str, Any]:
114+
"""
115+
Return a list of resolved incidents by their `impact` and created within the
116+
given window.
117+
118+
The window is either a string such as `3 minutes` and is relative to now
119+
minues these 3 minutes. Or it can be a number of seconds as real value.
120+
121+
You may restrict to the incidents created by a given resouce/role by
122+
setting the `created_by` arn.
123+
124+
See also:
125+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ssm-incidents.html
126+
""" # noqa: E501
127+
return get_incidents(
128+
impact,
129+
"RESOLVED",
130+
created_in_the_last,
131+
created_by,
132+
configuration=configuration,
133+
secrets=secrets,
134+
)
135+
136+
137+
def has_incident_been_opened(
138+
impact: int = 1,
139+
created_in_the_last: Union[str, float] = "3 minutes",
140+
created_by: Optional[str] = None,
141+
configuration: Configuration = None,
142+
secrets: Secrets = None,
143+
) -> bool:
144+
incidents = get_incidents(
145+
impact,
146+
"OPEN",
147+
created_in_the_last,
148+
created_by,
149+
configuration=configuration,
150+
secrets=secrets,
151+
)
152+
153+
return len(incidents["incidentRecordSummaries"]) > 0
154+
155+
156+
def has_incident_been_resolved(
157+
impact: int = 1,
158+
created_in_the_last: Union[str, float] = "3 minutes",
159+
created_by: Optional[str] = None,
160+
configuration: Configuration = None,
161+
secrets: Secrets = None,
162+
) -> bool:
163+
incidents = get_incidents(
164+
impact,
165+
"RESOLVED",
166+
created_in_the_last,
167+
created_by,
168+
configuration=configuration,
169+
secrets=secrets,
170+
)
171+
172+
return len(incidents["incidentRecordSummaries"]) > 0
173+
174+
175+
def get_active_incident_items(
176+
impact: int = 1,
177+
created_in_the_last: Union[str, float] = "3 minutes",
178+
created_by: Optional[str] = None,
179+
configuration: Configuration = None,
180+
secrets: Secrets = None,
181+
) -> Dict[str, Any]:
182+
"""
183+
Retrieve the list of items related to the most recent active incident
184+
matching the criteria.
185+
"""
186+
incidents = get_active_incidents(
187+
impact,
188+
created_in_the_last,
189+
created_by,
190+
configuration=configuration,
191+
secrets=secrets,
192+
)
193+
194+
arn = incidents["incidentRecordSummaries"][0]["arn"]
195+
196+
client = aws_client("ssm-incidents", configuration, secrets)
197+
try:
198+
response = client.list_incident_records(
199+
incidentRecordArn=arn,
200+
maxResults=10,
201+
)
202+
logger.debug(f"Found {len(response['relatedItems'])} items")
203+
except Exception as e:
204+
# catchall as boto3 exception management is so poorly documented
205+
logger.debug("Failed to call AWS SSM Incidents API", exc_info=True)
206+
raise ActivityFailed(f"SSM Incidents API failed: {str(e)}")
207+
208+
return response
209+
210+
211+
def get_resolved_incident_items(
212+
impact: int = 1,
213+
created_in_the_last: Union[str, float] = "3 minutes",
214+
created_by: Optional[str] = None,
215+
configuration: Configuration = None,
216+
secrets: Secrets = None,
217+
) -> Dict[str, Any]:
218+
"""
219+
Retrieve the list of items related to the most recent resolved incident
220+
matching the criteria.
221+
"""
222+
incidents = get_resolved_incidents(
223+
impact,
224+
created_in_the_last,
225+
created_by,
226+
configuration=configuration,
227+
secrets=secrets,
228+
)
229+
230+
arn = incidents["incidentRecordSummaries"][0]["arn"]
231+
232+
client = aws_client("ssm-incidents", configuration, secrets)
233+
try:
234+
logger.debug(f"Looking up items for incident {arn}")
235+
response = client.list_related_items(
236+
incidentRecordArn=arn,
237+
maxResults=10,
238+
)
239+
logger.debug(f"Found {len(response['relatedItems'])} items")
240+
except Exception as e:
241+
# catchall as boto3 exception management is so poorly documented
242+
logger.debug("Failed to call AWS SSM Incidents API", exc_info=True)
243+
raise ActivityFailed(f"SSM Incidents API failed: {str(e)}")
244+
245+
return response

chaosaws/xray/probes.py

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
import json
2-
from datetime import datetime, timedelta, timezone
32
from typing import Any, Dict, List, Optional, Union
43

54
from chaoslib.exceptions import ActivityFailed
65
from chaoslib.types import Configuration, Secrets
76
from logzero import logger
87

9-
from chaosaws import aws_client
8+
from chaosaws import aws_client, time_to_datetime
109

1110
__all__ = [
1211
"get_traces",
@@ -195,31 +194,3 @@ def get_service_graph(
195194
raise ActivityFailed(f"XRay service graph failed: {str(e)}")
196195

197196
return response
198-
199-
200-
###############################################################################
201-
# Private functions
202-
###############################################################################
203-
def time_to_datetime(
204-
ts: Union[str, float], offset: Optional[datetime] = None
205-
) -> datetime:
206-
if isinstance(ts, float):
207-
return datetime.fromtimestamp(ts, tz=timezone.utc)
208-
209-
if ts == "now":
210-
return datetime.utcnow().replace(tzinfo=timezone.utc)
211-
212-
offset = offset or datetime.utcnow().replace(tzinfo=timezone.utc)
213-
quantity, unit = ts.split(" ", 1)
214-
duration = float(quantity)
215-
216-
if unit in ("second", "seconds"):
217-
delta = 1
218-
elif unit in ("minute", "minutes"):
219-
delta = 60
220-
elif unit in ("hour", "hours"):
221-
delta = 60 * 60
222-
elif unit in ("day", "days"):
223-
delta = 60 * 60 * 24
224-
225-
return offset - timedelta(seconds=duration * delta)

0 commit comments

Comments
 (0)