Skip to content

Commit 9590718

Browse files
authored
Merge pull request #248 from fact-project/shifthelper_heartbeat
Heartbeats
2 parents 019da8d + dcd2c17 commit 9590718

File tree

7 files changed

+103
-42
lines changed

7 files changed

+103
-42
lines changed

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name='shifthelper',
5-
version='1.1.4',
5+
version='1.2.0',
66
description='a tool for helping people with a FACT night shift',
77
url='https://github.com/fact-project/shifthelper',
88
author='Dominik Neise, Maximilian Noethe, Sebastian Mueller',

shifthelper/__main__.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from .tools.shift import get_current_shifter
1010
from .tools import config
1111
from .logging import config_logging
12-
from .checks import FactIntervalCheck, FlareAlertCheck
12+
from .checks import FactIntervalCheck
1313
from . import conditions
1414
from .categories import CATEGORY_SHIFTER, CATEGORY_DEVELOPER
1515

@@ -54,7 +54,14 @@ def telegram_book(category):
5454
def main():
5555
with Custos(
5656
checks=[
57-
FlareAlertCheck(category=CATEGORY_SHIFTER, interval=300),
57+
FactIntervalCheck(
58+
name='HeartBeat',
59+
interval=120,
60+
checklist=[
61+
conditions.update_heartbeat,
62+
],
63+
category=CATEGORY_DEVELOPER
64+
),
5865
FactIntervalCheck(
5966
name='DummyAlert',
6067
interval=60,

shifthelper/conditions.py

+19
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from .tools import get_last_parking_checklist_entry
1818
from .tools import fetch_users_awake
1919
from .tools import fetch_dummy_alerts
20+
from . import tools
2021
from . import retry_smart_fact_crawler as sfc
2122
from .debug_log_wrapper import log_call_and_result
2223

@@ -277,6 +278,24 @@ def is_nobody_awake():
277278
return get_current_shifter().username not in awake
278279

279280

281+
@log_call_and_result
282+
def update_heartbeat():
283+
'''HeartbeatMonitor not ok'''
284+
log = logging.getLogger(__name__)
285+
heartbeats = tools.update_heartbeat()
286+
if "heartbeatMonitor" not in heartbeats:
287+
log.debug("HeartbeatMonitor offline?")
288+
return True
289+
else:
290+
heartbeat_monitor_age = (
291+
datetime.utcnow() -
292+
pd.to_datetime(heartbeats['heartbeatMonitor'])
293+
)
294+
if heartbeat_monitor_age > timedelta(minutes=10):
295+
log.debug('heartbeat_monitor_age > timedelta(minutes=10)')
296+
return True
297+
return False
298+
280299
@log_call_and_result
281300
def is_dummy_alert_by_shifter():
282301
'''Dummy Alert'''

shifthelper/notifiers.py

+39-30
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
import datetime
2-
import requests
32

43
from custos import TwilioNotifier
54
from .tools.shift import get_current_shifter
6-
from copy import copy
75
from .tools import config, get_alerts
86
from .categories import CATEGORY_DEVELOPER
97

@@ -37,32 +35,7 @@ def notify(self, recipient, msg):
3735
)
3836
self.not_acknowledged_messages.append(msg)
3937

40-
def _remove_acknowledged_and_old_calls(self):
41-
""" from the list of not acknowledged calls
42-
remove all calls, which have been acknowledged on the web page
43-
44-
Also remove calls older than 2 hours, to get out of
45-
a "call the backup shifter" dead lock
46-
"""
47-
try:
48-
alerts = {a['uuid']: a for a in get_alerts()}
49-
except requests.exceptions.RequestException:
50-
return
51-
52-
for msg in copy(self.not_acknowledged_messages):
53-
age = datetime.datetime.utcnow() - msg.timestamp
54-
if age > (self.max_time_for_fallback + self.time_before_fallback):
55-
self.not_acknowledged_messages.remove(msg)
56-
else:
57-
try:
58-
alert = alerts[str(msg.uuid)]
59-
except KeyError:
60-
continue
61-
62-
if alert['acknowledged'] is True:
63-
self.not_acknowledged_messages.remove(msg)
64-
65-
def _get_oldest_call_age(self):
38+
def _get_oldest_message_age(self):
6639
max_age = datetime.timedelta()
6740
for msg in self.not_acknowledged_messages:
6841
age = datetime.datetime.utcnow() - msg.timestamp
@@ -97,14 +70,16 @@ def get_numbers_to_call(self, msg):
9770
log.debug('Getting phone number of primary shifter')
9871
numbers_to_call.append(self.phone_number_of_normal_shifter())
9972

100-
if self._get_oldest_call_age() >= self.time_before_fallback:
73+
if self._get_oldest_message_age() >= self.time_before_fallback:
10174
log.debug('Getting phone number of fallback shifter')
10275
numbers_to_call.append(self.phone_number_of_fallback_shifter())
10376

10477
return numbers_to_call
10578

10679
def handle_message(self, msg):
107-
self._remove_acknowledged_and_old_calls()
80+
self._remove_old_messages()
81+
self._remove_acknowledged_messages()
82+
10883
log.debug('Got a message')
10984
if msg.level >= self.level:
11085
log.debug('Message is over alert level')
@@ -113,3 +88,37 @@ def handle_message(self, msg):
11388
for phone_number in numbers_to_call:
11489
log.info('Calling {}'.format(phone_number))
11590
self.notify(phone_number, msg)
91+
92+
def _remove_old_messages(self):
93+
""" from the list of not_acknowledged_messages
94+
remove messages older than a certain limit, to avoid calling the
95+
fallback forever.
96+
"""
97+
limit = self.max_time_for_fallback + self.time_before_fallback
98+
self.not_acknowledged_messages = [
99+
msg for msg in self.not_acknowledged_messages
100+
if not is_message_old(msg, limit)
101+
]
102+
103+
def _remove_acknowledged_messages(self):
104+
''' from the list of not_acknowledged_messages
105+
remove all messages, which have been acknowledged on the web page
106+
'''
107+
alerts = {a['uuid']: a for a in get_alerts()}
108+
109+
self.not_acknowledged_messages = [
110+
msg for msg in self.not_acknowledged_messages
111+
if not is_message_acknowledged(alerts, msg)
112+
]
113+
114+
115+
def is_message_acknowledged(alerts, msg):
116+
try:
117+
return alerts[str(msg.uuid)]['acknowledged']
118+
except KeyError:
119+
return False
120+
121+
122+
def is_message_old(msg, limit):
123+
age = datetime.datetime.utcnow() - msg.timestamp
124+
return age > limit

shifthelper/tools/__init__.py

+33-6
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,19 @@
2626
db_engines = {}
2727

2828

29-
@retry(stop_max_delay=30000, # 30 seconds max
30-
wait_exponential_multiplier=100, # wait 2^i * 100 ms, on the i-th retry
31-
wait_exponential_max=1000, # but wait 1 second per try maximum
32-
)
3329
def get_alerts():
34-
alerts = requests.get(config['webservice']['post-url'])
35-
return alerts.json()
30+
@retry(stop_max_delay=30000, # 30 seconds max
31+
wait_exponential_multiplier=100, # wait 2^i * 100 ms, on the i-th retry
32+
wait_exponential_max=1000, # but wait 1 second per try maximum
33+
wrap_exception=True
34+
)
35+
def retry_fetch_fail_after_30sec():
36+
alerts = requests.get(config['webservice']['post-url'])
37+
return alerts.json()
38+
try:
39+
return retry_fetch_fail_after_30sec()
40+
except RetryError:
41+
return {}
3642

3743

3844
def create_db_connection(db_config=None):
@@ -103,6 +109,27 @@ def retry_fetch_fail_after_30sec():
103109
return {}
104110

105111

112+
def update_heartbeat():
113+
@retry(
114+
stop_max_delay=30000, # 30 seconds max
115+
wait_exponential_multiplier=100, # wait 2^i * 100 ms, on the i-th retry
116+
wait_exponential_max=1000, # but wait 1 second per try maximum
117+
wrap_exception=True
118+
)
119+
def retry_fetch_fail_after_30sec():
120+
return requests.post(
121+
config['webservice']['shifthelperHeartbeat'],
122+
auth=(
123+
config['webservice']['user'],
124+
config['webservice']['password']
125+
)
126+
).json()
127+
try:
128+
return retry_fetch_fail_after_30sec()
129+
except RetryError as e:
130+
return {}
131+
132+
106133
class NightlyResettingDefaultdict(defaultdict):
107134
def __init__(self, *args, **kwargs):
108135
self.night = night_integer(datetime.utcnow())

tests/test_acknowledging.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def test_all_acknowledged():
3232
)
3333

3434

35-
def test_message_level_all_acknowledged():
35+
def test_message_level_all_acknowledged_1():
3636
from shifthelper.checks import message_level
3737

3838
alerts = pd.read_json('tests/resources/not_all_acknowledged.json')
@@ -44,7 +44,7 @@ def test_message_level_all_acknowledged():
4444
assert message_level(checkname='MainJSStatusCheck', alerts=alerts) == levels.WARNING
4545

4646

47-
def test_message_level_not_all_acknowledged():
47+
def test_message_level_not_all_acknowledged_2():
4848
from shifthelper.checks import message_level
4949

5050
alerts = pd.read_json('tests/resources/not_all_acknowledged.json')

tests/test_conditions.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from freezegun import freeze_time
2-
import datetime
32
import smart_fact_crawler
43

54

0 commit comments

Comments
 (0)