diff --git a/.gitignore b/.gitignore
index 830cd8a..3863c8c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,8 @@
.idea/
__pycache__/
-logs/*.log
-logs/*/*.log
+logs/*.log*
+logs/*/*.log*
!logs/alerts/.gitkeep
!logs/general/.gitkeep
diff --git a/Pipfile.lock b/Pipfile.lock
index bf1aae3..058f836 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -16,47 +16,48 @@
"default": {
"certifi": {
"hashes": [
- "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50",
- "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef"
+ "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3",
+ "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f"
],
- "version": "==2019.9.11"
+ "version": "==2019.11.28"
},
"cffi": {
"hashes": [
- "sha256:00d890313797d9fe4420506613384b43099ad7d2b905c0752dbcc3a6f14d80fa",
- "sha256:0cf9e550ac6c5e57b713437e2f4ac2d7fd0cd10336525a27224f5fc1ec2ee59a",
- "sha256:0ea23c9c0cdd6778146a50d867d6405693ac3b80a68829966c98dd5e1bbae400",
- "sha256:193697c2918ecdb3865acf6557cddf5076bb39f1f654975e087b67efdff83365",
- "sha256:1ae14b542bf3b35e5229439c35653d2ef7d8316c1fffb980f9b7647e544baa98",
- "sha256:1e389e069450609c6ffa37f21f40cce36f9be7643bbe5051ab1de99d5a779526",
- "sha256:263242b6ace7f9cd4ea401428d2d45066b49a700852334fd55311bde36dcda14",
- "sha256:33142ae9807665fa6511cfa9857132b2c3ee6ddffb012b3f0933fc11e1e830d5",
- "sha256:364f8404034ae1b232335d8c7f7b57deac566f148f7222cef78cf8ae28ef764e",
- "sha256:47368f69fe6529f8f49a5d146ddee713fc9057e31d61e8b6dc86a6a5e38cecc1",
- "sha256:4895640844f17bec32943995dc8c96989226974dfeb9dd121cc45d36e0d0c434",
- "sha256:558b3afef987cf4b17abd849e7bedf64ee12b28175d564d05b628a0f9355599b",
- "sha256:5ba86e1d80d458b338bda676fd9f9d68cb4e7a03819632969cf6d46b01a26730",
- "sha256:63424daa6955e6b4c70dc2755897f5be1d719eabe71b2625948b222775ed5c43",
- "sha256:6381a7d8b1ebd0bc27c3bc85bc1bfadbb6e6f756b4d4db0aa1425c3719ba26b4",
- "sha256:6381ab708158c4e1639da1f2a7679a9bbe3e5a776fc6d1fd808076f0e3145331",
- "sha256:6fd58366747debfa5e6163ada468a90788411f10c92597d3b0a912d07e580c36",
- "sha256:728ec653964655d65408949b07f9b2219df78badd601d6c49e28d604efe40599",
- "sha256:7cfcfda59ef1f95b9f729c56fe8a4041899f96b72685d36ef16a3440a0f85da8",
- "sha256:819f8d5197c2684524637f940445c06e003c4a541f9983fd30d6deaa2a5487d8",
- "sha256:825ecffd9574557590e3225560a8a9d751f6ffe4a49e3c40918c9969b93395fa",
- "sha256:8a2bcae2258d00fcfc96a9bde4a6177bc4274fe033f79311c5dd3d3148c26518",
- "sha256:9009e917d8f5ef780c2626e29b6bc126f4cb2a4d43ca67aa2b40f2a5d6385e78",
- "sha256:9c77564a51d4d914ed5af096cd9843d90c45b784b511723bd46a8a9d09cf16fc",
- "sha256:a19089fa74ed19c4fe96502a291cfdb89223a9705b1d73b3005df4256976142e",
- "sha256:a40ed527bffa2b7ebe07acc5a3f782da072e262ca994b4f2085100b5a444bbb2",
- "sha256:b8f09f21544b9899defb09afbdaeb200e6a87a2b8e604892940044cf94444644",
- "sha256:bb75ba21d5716abc41af16eac1145ab2e471deedde1f22c6f99bd9f995504df0",
- "sha256:e22a00c0c81ffcecaf07c2bfb3672fa372c50e2bd1024ffee0da191c1b27fc71",
- "sha256:e55b5a746fb77f10c83e8af081979351722f6ea48facea79d470b3731c7b2891",
- "sha256:ec2fa3ee81707a5232bf2dfbd6623fdb278e070d596effc7e2d788f2ada71a05",
- "sha256:fd82eb4694be712fcae03c717ca2e0fc720657ac226b80bbb597e971fc6928c2"
- ],
- "version": "==1.13.1"
+ "sha256:0b49274afc941c626b605fb59b59c3485c17dc776dc3cc7cc14aca74cc19cc42",
+ "sha256:0e3ea92942cb1168e38c05c1d56b0527ce31f1a370f6117f1d490b8dcd6b3a04",
+ "sha256:135f69aecbf4517d5b3d6429207b2dff49c876be724ac0c8bf8e1ea99df3d7e5",
+ "sha256:19db0cdd6e516f13329cba4903368bff9bb5a9331d3410b1b448daaadc495e54",
+ "sha256:2781e9ad0e9d47173c0093321bb5435a9dfae0ed6a762aabafa13108f5f7b2ba",
+ "sha256:291f7c42e21d72144bb1c1b2e825ec60f46d0a7468f5346841860454c7aa8f57",
+ "sha256:2c5e309ec482556397cb21ede0350c5e82f0eb2621de04b2633588d118da4396",
+ "sha256:2e9c80a8c3344a92cb04661115898a9129c074f7ab82011ef4b612f645939f12",
+ "sha256:32a262e2b90ffcfdd97c7a5e24a6012a43c61f1f5a57789ad80af1d26c6acd97",
+ "sha256:3c9fff570f13480b201e9ab69453108f6d98244a7f495e91b6c654a47486ba43",
+ "sha256:415bdc7ca8c1c634a6d7163d43fb0ea885a07e9618a64bda407e04b04333b7db",
+ "sha256:42194f54c11abc8583417a7cf4eaff544ce0de8187abaf5d29029c91b1725ad3",
+ "sha256:4424e42199e86b21fc4db83bd76909a6fc2a2aefb352cb5414833c030f6ed71b",
+ "sha256:4a43c91840bda5f55249413037b7a9b79c90b1184ed504883b72c4df70778579",
+ "sha256:599a1e8ff057ac530c9ad1778293c665cb81a791421f46922d80a86473c13346",
+ "sha256:5c4fae4e9cdd18c82ba3a134be256e98dc0596af1e7285a3d2602c97dcfa5159",
+ "sha256:5ecfa867dea6fabe2a58f03ac9186ea64da1386af2159196da51c4904e11d652",
+ "sha256:62f2578358d3a92e4ab2d830cd1c2049c9c0d0e6d3c58322993cc341bdeac22e",
+ "sha256:6471a82d5abea994e38d2c2abc77164b4f7fbaaf80261cb98394d5793f11b12a",
+ "sha256:6d4f18483d040e18546108eb13b1dfa1000a089bcf8529e30346116ea6240506",
+ "sha256:71a608532ab3bd26223c8d841dde43f3516aa5d2bf37b50ac410bb5e99053e8f",
+ "sha256:74a1d8c85fb6ff0b30fbfa8ad0ac23cd601a138f7509dc617ebc65ef305bb98d",
+ "sha256:7b93a885bb13073afb0aa73ad82059a4c41f4b7d8eb8368980448b52d4c7dc2c",
+ "sha256:7d4751da932caaec419d514eaa4215eaf14b612cff66398dd51129ac22680b20",
+ "sha256:7f627141a26b551bdebbc4855c1157feeef18241b4b8366ed22a5c7d672ef858",
+ "sha256:8169cf44dd8f9071b2b9248c35fc35e8677451c52f795daa2bb4643f32a540bc",
+ "sha256:aa00d66c0fab27373ae44ae26a66a9e43ff2a678bf63a9c7c1a9a4d61172827a",
+ "sha256:ccb032fda0873254380aa2bfad2582aedc2959186cce61e3a17abc1a55ff89c3",
+ "sha256:d754f39e0d1603b5b24a7f8484b22d2904fa551fe865fd0d4c3332f078d20d4e",
+ "sha256:d75c461e20e29afc0aee7172a0950157c704ff0dd51613506bd7d82b718e7410",
+ "sha256:dcd65317dd15bc0451f3e01c80da2216a31916bdcffd6221ca1202d96584aa25",
+ "sha256:e570d3ab32e2c2861c4ebe6ffcad6a8abf9347432a37608fe1fbd157b3f0036b",
+ "sha256:fd43a88e045cf992ed09fa724b5315b790525f2676883a6ea64e3263bae6549d"
+ ],
+ "version": "==1.13.2"
},
"chardet": {
"hashes": [
@@ -101,9 +102,9 @@
},
"future": {
"hashes": [
- "sha256:858e38522e8fd0d3ce8f0c1feaf0603358e366d5403209674c7b617fa0c24093"
+ "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"
],
- "version": "==0.18.1"
+ "version": "==0.18.2"
},
"idna": {
"hashes": [
@@ -127,11 +128,11 @@
},
"python-dateutil": {
"hashes": [
- "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
- "sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e"
+ "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
+ "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"
],
"index": "pypi",
- "version": "==2.8.0"
+ "version": "==2.8.1"
},
"python-telegram-bot": {
"hashes": [
@@ -166,10 +167,10 @@
},
"six": {
"hashes": [
- "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
- "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
+ "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd",
+ "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66"
],
- "version": "==1.12.0"
+ "version": "==1.13.0"
},
"tornado": {
"hashes": [
@@ -185,17 +186,17 @@
},
"twilio": {
"hashes": [
- "sha256:e78a2006b9449fb9fad5050537e0998c181c7d3a62eaa9eed434e59dbaf58324"
+ "sha256:da282a9c02bd9dfb190b798528b478833d8d28cb51464e8c45da0f0794384cde"
],
"index": "pypi",
- "version": "==6.32.0"
+ "version": "==6.34.0"
},
"urllib3": {
"hashes": [
- "sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398",
- "sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86"
+ "sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293",
+ "sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745"
],
- "version": "==1.25.6"
+ "version": "==1.25.7"
}
},
"develop": {
diff --git a/config/example_user_config_main.ini b/config/example_user_config_main.ini
index afc0675..c577df4 100644
--- a/config/example_user_config_main.ini
+++ b/config/example_user_config_main.ini
@@ -32,3 +32,8 @@ host = localhost
port = 6379
password = HMASDNoiSADnuiasdgnAIO876hg967bv99vb8buyT8BVuyT76VBT76uyi
+[periodic_alive_reminder]
+enabled = True
+interval_seconds = 3600
+email_enabled = False
+telegram_enabled = True
diff --git a/config/example_user_config_nodes.ini b/config/example_user_config_nodes.ini
index e0b5f3f..acb82c7 100644
--- a/config/example_user_config_nodes.ini
+++ b/config/example_user_config_nodes.ini
@@ -24,4 +24,4 @@ node_name = Sentry 2
node_rpc_url = http://11.22.33.44:26657
node_is_validator = false
include_in_node_monitor = false
-include_in_network_monitor = false
\ No newline at end of file
+include_in_network_monitor = false
diff --git a/config/example_user_config_repos.ini b/config/example_user_config_repos.ini
index 25a664f..3a2ab27 100644
--- a/config/example_user_config_repos.ini
+++ b/config/example_user_config_repos.ini
@@ -6,4 +6,4 @@ include_in_github_monitor = true
[repo_2]
repo_name = Gaia
repo_page = cosmos/gaia/
-include_in_github_monitor = false
\ No newline at end of file
+include_in_github_monitor = false
diff --git a/config/internal_config.ini b/config/internal_config.ini
index 6a7fe67..383defe 100644
--- a/config/internal_config.ini
+++ b/config/internal_config.ini
@@ -18,13 +18,14 @@ twiml_instructions_url = https://twimlets.com/echo
[redis]
redis_database = 10
-redis_rest_database = 11
+redis_test_database = 11
redis_twilio_snooze_key = twilio_snooze
redis_github_releases_key_prefix = github_releases_
redis_node_monitor_alive_key_prefix = node_monitor_alive_
redis_network_monitor_alive_key_prefix = network_monitor_alive_
redis_network_monitor_last_height_key_prefix = network_monitor_last_height_checked_
+redis_periodic_alive_reminder_mute_key = alive_reminder_mute
redis_node_monitor_alive_key_timeout = 86400
redis_network_monitor_alive_key_timeout = 86400
@@ -34,7 +35,8 @@ redis_network_monitor_alive_key_timeout = 86400
[monitoring_periods]
node_monitor_period_seconds = 10
network_monitor_period_seconds = 10
-github_monitor_period_seconds = 300
+network_monitor_max_catch_up_blocks = 500
+github_monitor_period_seconds = 3600
# These define how often a monitor runs an iteration of its monitoring loop
[alert_intervals_and_limits]
@@ -42,7 +44,8 @@ downtime_alert_interval_seconds = 900
max_missed_blocks_interval_seconds = 120
max_missed_blocks_in_time_interval = 10
validator_peer_danger_boundary = 1
-full_node_peer_danger_boundary = 15
+validator_peer_safe_boundary = 5
+full_node_peer_danger_boundary = 10
missed_blocks_danger_boundary = 5
github_error_interval_seconds = 3600
# These limit the number of alerts of a specific type received using either
diff --git a/doc/CHANGELOG.md b/doc/CHANGELOG.md
index 7a2847a..628a6e3 100644
--- a/doc/CHANGELOG.md
+++ b/doc/CHANGELOG.md
@@ -1,9 +1,51 @@
# Change Log
+## 1.1.0
+
+Released on December 05, 2019.
+
+### Update Instructions
+
+To update an instance of P.A.N.I.C. to this version:
+```shell script
+git fetch # Fetch these changes
+git checkout v1.1.0 # Switch to this version
+
+pipenv update # Update dependencies
+pipenv run python run_util_update_to_v1.1.0.py
+```
+
+The `run_util_update_to_v1.1.0.py` script updates `user_config_main.ini` so that it becomes compatible with the v1.1.0 `user_config_main.ini` file.
+
+P.A.N.I.C. can now be started up. If the alerter was running as a Linux service, the service should now be restarted:
+
+```shell script
+sudo systemctl restart panic_alerter
+```
+
+### Features
+* Add **authenticated SMTP**, so that email channel can use public SMTP servers, such as smtp.gmail.com, by supplying a valid username and password.
+* Add **periodic alive reminder** as a way for the alerter to inform the user that it is still running. It is turned on through the setup process and can be muted/unmuted using commands from Telegram.
+* Add **validator peer safe boundary** (`validator_peer_safe_boundary`, default: 5) to limit peer change alerts up to a certain number of peers.
+* Add **max catch up blocks** (`network_monitor_max_catchup_blocks`, default: 500) to limit the number of historical blocks that the network monitor checks if it is not in sync, so that it focuses on the more important present events.
+* Add **current network monitor block height** to Telegram status message.
+
+### Changes and Improvements
+* Email channel now supports multiple recipients.
+* Internal config
+ * Changed default GitHub monitor period to 3600 seconds (1h).
+ * Changed default `full_node_peer_danger_boundary` to 10 for less alerts.
+* Other:
+ * Updated Telegram bot to use new context-based callbacks.
+ * Now .gitignoring numbered log files (e.g. `*.log.1`)
+
+### Bug Fixes
+* Fixed full node peer increase alert not sent if the new number of peers is equal to the danger boundary.
+* Setup processes now clear config file before adding new entries.
+
## 1.0.0
Released on August 23, 2019.
### Added
-
* First version of the P.A.N.I.C. alerter by Simply VC
\ No newline at end of file
diff --git a/doc/DESIGN_AND_FEATURES.md b/doc/DESIGN_AND_FEATURES.md
index 8600db5..d61dd08 100644
--- a/doc/DESIGN_AND_FEATURES.md
+++ b/doc/DESIGN_AND_FEATURES.md
@@ -6,6 +6,7 @@ This page will present the inner workings of the alerter as well as the features
- **Alerting Channels**: (console, logging, Telegram, email, Twilio)
- **Alert Types**: (major, minor, info, error)
- **Monitor Types**: (node, network, GitHub)
+- **Periodic Alive Reminder**
- **Telegram Commands**
- **Redis**
- **Complete List of Alerts**
@@ -23,7 +24,7 @@ P.A.N.I.C. currently supports five alerting channels. By default, only console a
- **Console**: alerts printed to standard output (`stdout`).
- **Logging**: alerts logged to an alerts log (`logs/alerts/alerts.log`).
- **Telegram**: alerts delivered to a Telegram chat via a Telegram bot.
-- **Email**: alerts sent as emails using a personal SMTP server.
+- **Email**: alerts sent as emails using an SMTP server, with option for authentication.
- **Twilio**: alerts trigger a phone call to grab the node operator's attention.
Instructions on how to set up the alerting channels can be found in the [installation guide](./INSTALL_AND_RUN.md).
@@ -68,16 +69,27 @@ The network monitor deals with a ***minimum* of one validator node and one (non-
An important note is that the full node(s) should be a reliable data source in terms of availability. So much so that if there are no full nodes accessible, this is considered to be equivalent to the validator losing blocks and thus a `MAJOR` alert is raised.
+If the alerter is not in sync with the validator with respect to block height, the maximum number of historical blocks checked is `MCUB`, which is configurable from the internal config (`network_monitor_max_catch_up_blocks`).
+
In each monitoring round, the network monitor:
1. Gets the node's abci info from `[RPC_URL]/abci_info`
1. Gets the latest block height *LastH*
-2. For each height *H* from the last block height checked until *LastH*:
+2. Sets *H* = *LastHChecked* + 1 where *LastHChecked is the height of the last block checked by the network monitor
+3. If *LastH* - *LastHChecked* > `MCUB`:
+ 1. Sets *H* = *LastH* - `MCUB`
+ 2. Gets the block at height *H* from `[RPC_URL]/block?height=H`
+ 3. Checks whether our validator is in the list of participating validators
+ 4. Increments or resets (depending on the outcome) the missed blocks counter for our validator
+4. Otherwise if *H* <= *LastHChecked* :
1. Gets the block at height *H* from `[RPC_URL]/block?height=H`
2. Checks whether our validator is in the list of participating validators
3. Increments or resets (depending on the outcome) the missed blocks counter for our validator
-3. Saves its state and the nodes' state
-4. Sleeps until the next monitoring round
+5. Saves its state and the nodes' state
+6. Sleeps until the next monitoring round if it is not syncing (*LastH*-*LastHChecked* > 2).
+
+Default value:
+- `MCUB = network_monitor_max_catch_up_blocks = 500`
### GitHub Monitor
@@ -90,13 +102,22 @@ In each monitoring round, the GitHub monitor:
2. Saves its state
3. Sleeps until the next monitoring round
+## Periodic Alive Reminder
+
+The periodic alive reminder is a way for P.A.N.I.C to inform the operator that it is still running. This can be useful to the operator when no alerts have been sent for a long time, therefore it does not leave the operator wondering whether P.A.N.I.C is still running or not.
+
+The following are some important points about the periodic alive reminder:
+
+1. The time after which a reminder is sent can be specified by the operator using the setup process described [here](./INSTALL_AND_RUN.md).
+2. The periodic alive reminder can be muted and unmuted using Telegram as discussed below.
+
## Telegram Commands
-Telegram bots in P.A.N.I.C. serve two purposes. As mentioned above, they are used to send alerts. However they can also accept commands that allow you to check the status of the alerter (and its running monitors), snooze or unsnooze calls, and conveniently get Cosmos explorer links to validator lists, blocks, and transactions.
+Telegram bots in P.A.N.I.C. serve two purposes. As mentioned above, they are used to send alerts. However they can also accept commands that allow you to check the status of the alerter (and its running monitors), snooze or unsnooze calls, mute or unmute periodic alive reminders, and conveniently get Cosmos explorer links to validator lists, blocks, and transactions.
-For example, the `/status` command returns the following, if Redis is running along with three node monitors and one network monitor, and with calls not snoozed:
+For example, the `/status` command returns the following, if Redis is running along with three node monitors and one network monitor, with calls not snoozed, and periodic alive reminder not muted:
@@ -171,15 +192,23 @@ Voting power change alerts are mostly info alerts; voting power increase is alwa
### Number of Peers
-Alerts for changes in the number of peers range from info to major. Any increase is positive and is thus an info alert. As for peer decrease alerts:
-- For validator nodes: any decrease to `N` peers inside a configurable danger boundary `D1` is a major alert (i.e. `N <= D1`). Otherwise, any other decrease is a minor alert.
-- For non-validator nodes: any decrease to `N` peers inside a configurable danger boundary `D2` is a minor alert (i.e. `N <= D2`). Otherwise, any other decreases raises no alert.
+Alerts for changes in the number of peers range from info to major.
+#### For Validator Nodes
+- Any decrease to `N` peers inside a configurable danger boundary `D1` is a major alert (i.e. `N <= D1`).
+- Any decrease to `N` peers inside a configurable safe boundary `S1` is a minor alert (i.e. `D1 < N <= s1`).
+- Any decrease to `N` peers outside a configurable safe boundary `S1` raises no alerts (i.e. `N > S1`).
+- Any increase to `N` peers inside a configurable safe/danger boundary `S1`/`D1` raises an info alert (i.e. `N <= S1/D1`)
+- Any increase to `N` peers outside a configurable safe boundary `S1` raises no alerts (i.e. `N > S1`).
+#### For Non-Validator Nodes
+- Any decrease to `N` peers inside a configurable danger boundary `D2` raises a minor alert (i.e. `N <= D2`). Otherwise, any other decreases raises no alert.
+- Any increase to `N` peers inside a configurable danger boundary `D2` raises an info alert (i.e. `N <= D2`). Otherwise, any other increase raises no alert.
Non-validator nodes typically have much more peers where not each one of them is important. Thus, once `D2` is exceeded (`N > D2`), a special *'increased outside danger range'* info alert is issued and no further peer increase alerts are issued, to reduce alert spam.
Default values:
- `D1 = validator_peer_danger_boundary = 1`
-- `D2 = full_node_peer_danger_boundary = 15`
+- `D2 = full_node_peer_danger_boundary = 10`
+- `S1 = validator_peer_safe_boundary = 5`
| Class | Severity | Configurable |
|---|---|---|
@@ -214,6 +243,17 @@ The only two alerts raised by the GitHub alerter are an info alert when a new re
| `NewGitHubReleaseAlert` | `INFO` | ✗ |
| `CannotAccessGitHubPageAlert` | `ERROR` | ✗ |
+### Periodic Alive Reminder
+
+If the periodic alive reminder is enabled from the config file, and P.A.N.I.C is running smoothly, the operator is informed every time period that P.A.N.I.C is still running via an info alert.
+
+The periodic alive reminder always uses the console and logger to raise this alert, however, the operator can also receive this alert via Telegram, Email or both, by modifying the config file as described [here](./INSTALL_AND_RUN.md#setting-up-panic).
+
+| Class | Severity | Configurable |
+|---|---|---|
+| `AlerterAliveAlert` | `INFO` | ✓ |
+
+
### Other (Errors)
Last but not least is a set of error alerts, including read errors when gathering data from a node, termination of a component of the alerter (e.g. a monitor) due to some exception, and any problem experienced when using Telegram bots.
diff --git a/doc/IMG_TELEGRAM_COMMANDS.png b/doc/IMG_TELEGRAM_COMMANDS.png
index 80755b4..022793c 100644
Binary files a/doc/IMG_TELEGRAM_COMMANDS.png and b/doc/IMG_TELEGRAM_COMMANDS.png differ
diff --git a/doc/IMG_TELEGRAM_STATUS_COMMAND.png b/doc/IMG_TELEGRAM_STATUS_COMMAND.png
index f376759..795f1d6 100644
Binary files a/doc/IMG_TELEGRAM_STATUS_COMMAND.png and b/doc/IMG_TELEGRAM_STATUS_COMMAND.png differ
diff --git a/doc/INSTALL_AND_RUN.md b/doc/INSTALL_AND_RUN.md
index 08a82d0..bf7cf04 100644
--- a/doc/INSTALL_AND_RUN.md
+++ b/doc/INSTALL_AND_RUN.md
@@ -23,11 +23,12 @@ The only major requirement to run P.A.N.I.C. is Python 3. However, to unlock the
2. To install **pip** package manager:
- On Linux, run: `apt-get install python3-pip`
- On Windows, it should come included in the installation.
-3. To install **pipenv** packaging tool, run `pip install pipenv`.
+3. To install **pipenv** packaging tool, run `pip install pipenv`.
+ (If 'pip' is not found, try using 'pip3' instead.)
**At the end, you should be able to:**
1. Get the Python version by running `python --version`.
- (If multiple versions of Python are installed, the `python` executable may be `python3.6`, `python3.7`, etc.)
+ (You may have to replace 'python' with 'python3.6', 'python3.7', etc.)
2. Get the pip version by running `pip --version`.
3. Get the pipenv version by running `pipenv --version`.
diff --git a/run_alerter.py b/run_alerter.py
index 991a0fc..3b8c3c8 100644
--- a/run_alerter.py
+++ b/run_alerter.py
@@ -3,7 +3,10 @@
from typing import List, Tuple
from src.alerting.alert_utils.get_channel_set import get_full_channel_set
+from src.alerting.alert_utils.get_channel_set import \
+ get_periodic_alive_reminder_channel_set
from src.alerting.alerts.alerts import TerminatedDueToExceptionAlert
+from src.alerting.periodic.periodic import periodic_alive_reminder
from src.commands.handlers.telegram import TelegramCommands
from src.monitoring.monitor_utils.get_json import get_cosmos_json, get_json
from src.monitoring.monitors.github import GitHubMonitor
@@ -91,19 +94,21 @@ def run_monitor_nodes(node: Node):
node.name, InternalConf.logging_level, rotating=True)
# Initialise monitor
- node_monitor = NodeMonitor(monitor_name, channel_set,
+ node_monitor = NodeMonitor(monitor_name, full_channel_set,
logger_monitor_node, REDIS, node)
- # Start
- log_and_print('{} started.'.format(monitor_name))
- sys.stdout.flush()
- try:
- start_node_monitor(node_monitor,
- InternalConf.node_monitor_period_seconds,
- logger_monitor_node)
- except Exception as e:
- channel_set.alert_error(TerminatedDueToExceptionAlert(monitor_name, e))
- log_and_print('{} stopped.'.format(monitor_name))
+ while True:
+ # Start
+ log_and_print('{} started.'.format(monitor_name))
+ sys.stdout.flush()
+ try:
+ start_node_monitor(node_monitor,
+ InternalConf.node_monitor_period_seconds,
+ logger_monitor_node)
+ except Exception as e:
+ full_channel_set.alert_error(
+ TerminatedDueToExceptionAlert(monitor_name, e))
+ log_and_print('{} stopped.'.format(monitor_name))
def run_monitor_network(network_nodes_tuple: Tuple[str, List[Node]]):
@@ -132,25 +137,29 @@ def run_monitor_network(network_nodes_tuple: Tuple[str, List[Node]]):
return
# Initialise monitor
- network_monitor = NetworkMonitor(monitor_name, channel_set,
- logger_monitor_network, REDIS,
- full_nodes, validators)
+ network_monitor = NetworkMonitor(monitor_name, full_channel_set,
+ logger_monitor_network,
+ InternalConf.
+ network_monitor_max_catch_up_blocks,
+ REDIS, full_nodes, validators)
except Exception as e:
msg = '!!! Error when initialising {}: {} !!!'.format(monitor_name, e)
log_and_print(msg)
raise InitialisationException(msg)
- # Start
- log_and_print('{} started with {} validator(s) and {} full node(s).'
- ''.format(monitor_name, len(validators), len(full_nodes)))
- sys.stdout.flush()
- try:
- start_network_monitor(network_monitor,
- InternalConf.network_monitor_period_seconds,
- logger_monitor_network)
- except Exception as e:
- channel_set.alert_error(TerminatedDueToExceptionAlert(monitor_name, e))
- log_and_print('{} stopped.'.format(monitor_name))
+ while True:
+ # Start
+ log_and_print('{} started with {} validator(s) and {} full node(s).'
+ ''.format(monitor_name, len(validators), len(full_nodes)))
+ sys.stdout.flush()
+ try:
+ start_network_monitor(network_monitor,
+ InternalConf.network_monitor_period_seconds,
+ logger_monitor_network)
+ except Exception as e:
+ full_channel_set.alert_error(
+ TerminatedDueToExceptionAlert(monitor_name, e))
+ log_and_print('{} stopped.'.format(monitor_name))
def run_commands_telegram():
@@ -161,21 +170,24 @@ def run_commands_telegram():
if not UserConf.telegram_cmds_enabled:
return
- # Start
- log_and_print('{} started.'.format(monitor_name))
- sys.stdout.flush()
- try:
- TelegramCommands(
- UserConf.telegram_cmds_bot_token,
- UserConf.telegram_cmds_bot_chat_id,
- logger_commands_telegram, REDIS,
- InternalConf.redis_twilio_snooze_key,
- InternalConf.redis_node_monitor_alive_key_prefix,
- InternalConf.redis_network_monitor_alive_key_prefix
- ).start_listening()
- except Exception as e:
- channel_set.alert_error(TerminatedDueToExceptionAlert(monitor_name, e))
- log_and_print('{} stopped.'.format(monitor_name))
+ while True:
+ # Start
+ log_and_print('{} started.'.format(monitor_name))
+ sys.stdout.flush()
+ try:
+ TelegramCommands(
+ UserConf.telegram_cmds_bot_token,
+ UserConf.telegram_cmds_bot_chat_id,
+ logger_commands_telegram, REDIS,
+ InternalConf.redis_twilio_snooze_key,
+ InternalConf.redis_periodic_alive_reminder_mute_key,
+ InternalConf.redis_node_monitor_alive_key_prefix,
+ InternalConf.redis_network_monitor_alive_key_prefix
+ ).start_listening()
+ except Exception as e:
+ full_channel_set.alert_error(
+ TerminatedDueToExceptionAlert(monitor_name, e))
+ log_and_print('{} stopped.'.format(monitor_name))
def run_monitor_github(repo_config: RepoConfig):
@@ -196,7 +208,7 @@ def run_monitor_github(repo_config: RepoConfig):
# Initialise monitor
github_monitor = GitHubMonitor(
- monitor_name, channel_set, logger_monitor_github, REDIS,
+ monitor_name, full_channel_set, logger_monitor_github, REDIS,
repo_config.repo_name, releases_page,
InternalConf.redis_github_releases_key_prefix)
except Exception as e:
@@ -204,16 +216,37 @@ def run_monitor_github(repo_config: RepoConfig):
log_and_print(msg)
raise InitialisationException(msg)
- # Start
- log_and_print('{} started.'.format(monitor_name))
- sys.stdout.flush()
- try:
- start_github_monitor(github_monitor,
- InternalConf.github_monitor_period_seconds,
- logger_monitor_github)
- except Exception as e:
- channel_set.alert_error(TerminatedDueToExceptionAlert(monitor_name, e))
- log_and_print('{} stopped.'.format(monitor_name))
+ while True:
+ # Start
+ log_and_print('{} started.'.format(monitor_name))
+ sys.stdout.flush()
+ try:
+ start_github_monitor(github_monitor,
+ InternalConf.github_monitor_period_seconds,
+ logger_monitor_github)
+ except Exception as e:
+ full_channel_set.alert_error(
+ TerminatedDueToExceptionAlert(monitor_name, e))
+ log_and_print('{} stopped.'.format(monitor_name))
+
+
+def run_periodic_alive_reminder():
+ if not UserConf.periodic_alive_reminder_enabled:
+ return
+
+ name = "Periodic alive reminder"
+
+ while True:
+ log_and_print('{} started.'.format(name))
+ try:
+ periodic_alive_reminder(
+ UserConf.interval_seconds,
+ periodic_alive_reminder_channel_set,
+ InternalConf.redis_periodic_alive_reminder_mute_key, REDIS)
+ except Exception as e:
+ periodic_alive_reminder_channel_set.alert_error(
+ TerminatedDueToExceptionAlert(name, e))
+ log_and_print('{} stopped.'.format(name))
if __name__ == '__main__':
@@ -240,10 +273,16 @@ def run_monitor_github(repo_config: RepoConfig):
# Alerters initialisation
alerter_name = 'P.A.N.I.C.'
- channel_set = get_full_channel_set(
+ full_channel_set = get_full_channel_set(
alerter_name, logger_general, REDIS, log_file_alerts)
- log_and_print('Enabled alerting channels: {}'.format(
- channel_set.enabled_channels_list()))
+ log_and_print('Enabled alerting channels (general): {}'.format(
+ full_channel_set.enabled_channels_list()))
+ periodic_alive_reminder_channel_set = \
+ get_periodic_alive_reminder_channel_set(alerter_name, logger_general,
+ REDIS, log_file_alerts)
+ log_and_print('Enabled alerting channels (periodic alive reminder): {}'
+ ''.format(periodic_alive_reminder_channel_set.
+ enabled_channels_list()))
sys.stdout.flush()
# Nodes initialisation
@@ -280,11 +319,14 @@ def run_monitor_github(repo_config: RepoConfig):
monitor_network_count = len(unique_networks)
monitor_github_count = len(UserConf.filtered_repos)
commands_telegram_count = 1
+ periodic_alive_reminder_count = 1
total_count = sum([monitor_node_count, monitor_network_count,
- monitor_github_count, commands_telegram_count])
+ monitor_github_count, commands_telegram_count,
+ periodic_alive_reminder_count])
with concurrent.futures.ThreadPoolExecutor(max_workers=total_count) \
as executor:
executor.map(run_monitor_nodes, node_monitor_nodes)
executor.map(run_monitor_network, nodes_by_network.items())
executor.map(run_monitor_github, UserConf.filtered_repos)
executor.submit(run_commands_telegram)
+ executor.submit(run_periodic_alive_reminder)
diff --git a/run_util_update_to_v1.1.0.py b/run_util_update_to_v1.1.0.py
new file mode 100644
index 0000000..19f71df
--- /dev/null
+++ b/run_util_update_to_v1.1.0.py
@@ -0,0 +1,39 @@
+import os
+from configparser import ConfigParser
+
+
+def main():
+ if not os.path.isfile('config/user_config_main.ini'):
+ print('User config does not exist, so there is no need to update it.')
+ print('To create this file, you can run the setup (run_setup.py).')
+ return
+
+ cp = ConfigParser()
+ cp.read('config/user_config_main.ini')
+
+ # Create periodic_alive_reminder section
+ if 'periodic_alive_reminder' in cp:
+ print('Periodic alive reminder config was ALREADY UPDATED.')
+ else:
+ cp.add_section('periodic_alive_reminder')
+ cp['periodic_alive_reminder']['enabled'] = str(False)
+ cp['periodic_alive_reminder']['interval_seconds'] = ''
+ cp['periodic_alive_reminder']['email_enabled'] = ''
+ cp['periodic_alive_reminder']['telegram_enabled'] = ''
+ print('Periodic alive reminder config UPDATED.')
+
+ # Set new SMTP user and pass to blank
+ if 'user' in cp['email_alerts'] and 'pass' in cp['email_alerts']:
+ print('User and pass in email_alerts config were ALREADY UPDATED.')
+ else:
+ cp['email_alerts']['user'] = ''
+ cp['email_alerts']['pass'] = ''
+ print('User and pass in email_alerts config UPDATED (set to blank).')
+
+ with open('config/user_config_main.ini', 'w') as f:
+ cp.write(f)
+ print('Update process finished.')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/alerting/alert_utils/get_channel_set.py b/src/alerting/alert_utils/get_channel_set.py
index e69d62a..272a253 100644
--- a/src/alerting/alert_utils/get_channel_set.py
+++ b/src/alerting/alert_utils/get_channel_set.py
@@ -18,55 +18,97 @@
from src.utils.redis_api import RedisApi
-def get_full_channel_set(channel_name: str, logger_general: logging.Logger,
- redis: Optional[RedisApi], alerts_log_file: str,
- internal_conf: InternalConfig = InternalConf,
- user_conf: UserConfig = UserConf) -> ChannelSet:
+def _get_log_channel(alerts_log_file: str, channel_name: str,
+ logger_general: logging.Logger,
+ internal_conf: InternalConfig = InternalConf) \
+ -> LogChannel:
# Logger initialisation
logger_alerts = create_logger(alerts_log_file, 'alerts',
internal_conf.logging_level)
+ return LogChannel(channel_name, logger_general, logger_alerts)
+
+
+def _get_console_channel(channel_name: str,
+ logger_general: logging.Logger) -> ConsoleChannel:
+ return ConsoleChannel(channel_name, logger_general)
+
+
+def _get_telegram_channel(channel_name: str, logger_general: logging.Logger,
+ redis: Optional[RedisApi],
+ backup_channels_for_telegram: ChannelSet,
+ user_conf: UserConfig = UserConf) -> TelegramChannel:
+ telegram_bot = TelegramBotApi(user_conf.telegram_alerts_bot_token,
+ user_conf.telegram_alerts_bot_chat_id)
+ telegram_channel = TelegramChannel(
+ channel_name, logger_general, redis,
+ telegram_bot, backup_channels_for_telegram)
+ return telegram_channel
+
+
+def _get_email_channel(channel_name: str, logger_general: logging.Logger,
+ redis: Optional[RedisApi],
+ user_conf: UserConfig = UserConf) -> EmailChannel:
+ email = EmailSender(user_conf.email_smtp, user_conf.email_from,
+ user_conf.email_user, user_conf.email_pass)
+ email_channel = EmailChannel(channel_name, logger_general,
+ redis, email, user_conf.email_to)
+ return email_channel
+
+def _get_twilio_channel(channel_name: str, logger_general: logging.Logger,
+ redis: Optional[RedisApi],
+ backup_channels_for_twilio: ChannelSet,
+ internal_conf: InternalConfig = InternalConf,
+ user_conf: UserConfig = UserConf) -> TwilioChannel:
+ twilio = TwilioApi(user_conf.twilio_account_sid,
+ user_conf.twilio_auth_token)
+ twilio_channel = TwilioChannel(channel_name, logger_general,
+ redis, twilio,
+ user_conf.twilio_phone_number,
+ user_conf.twilio_dial_numbers,
+ internal_conf.twiml_instructions_url,
+ internal_conf.redis_twilio_snooze_key,
+ backup_channels_for_twilio)
+ return twilio_channel
+
+
+def get_full_channel_set(channel_name: str, logger_general: logging.Logger,
+ redis: Optional[RedisApi], alerts_log_file: str,
+ internal_conf: InternalConfig = InternalConf,
+ user_conf: UserConfig = UserConf) -> ChannelSet:
# Initialise list of channels with default channels
channels = [
- ConsoleChannel(channel_name, logger_general),
- LogChannel(channel_name, logger_general, logger_alerts)
+ _get_console_channel(channel_name, logger_general),
+ _get_log_channel(alerts_log_file, channel_name, logger_general,
+ internal_conf)
]
# Initialise backup channel sets with default channels
backup_channels_for_telegram = ChannelSet(channels)
backup_channels_for_twilio = ChannelSet(channels)
- # Add telegram alerts to channel set
+ # Add telegram alerts to channel set if they are enabled from config file
if user_conf.telegram_alerts_enabled:
- telegram_bot = TelegramBotApi(user_conf.telegram_alerts_bot_token,
- user_conf.telegram_alerts_bot_chat_id)
- telegram_channel = TelegramChannel(channel_name, logger_general, redis,
- telegram_bot,
- backup_channels_for_telegram)
+ telegram_channel = _get_telegram_channel(
+ channel_name, logger_general, redis,
+ backup_channels_for_telegram, user_conf)
channels.append(telegram_channel)
else:
telegram_channel = None
- # Add email alerts to channel set
+ # Add email alerts to channel set if they are enabled from config file
if user_conf.email_alerts_enabled:
- email = EmailSender(user_conf.email_smtp, user_conf.email_from,
- user_conf.email_user, user_conf.email_pass)
- email_channel = EmailChannel(channel_name, logger_general,
- redis, email, user_conf.email_to)
+ email_channel = _get_email_channel(channel_name, logger_general,
+ redis, user_conf)
channels.append(email_channel)
else:
email_channel = None
- # Add twilio alerts to channel set
+ # Add twilio alerts to channel set if they are enabled from config file
if user_conf.twilio_alerts_enabled:
- twilio = TwilioApi(user_conf.twilio_account_sid,
- user_conf.twilio_auth_token)
- twilio_channel = TwilioChannel(channel_name, logger_general, redis,
- twilio, user_conf.twilio_phone_number,
- user_conf.twilio_dial_numbers,
- internal_conf.twiml_instructions_url,
- internal_conf.redis_twilio_snooze_key,
- backup_channels_for_twilio)
+ twilio_channel = _get_twilio_channel(channel_name, logger_general,
+ redis, backup_channels_for_twilio,
+ internal_conf, user_conf)
channels.append(twilio_channel)
else:
# noinspection PyUnusedLocal
@@ -82,3 +124,46 @@ def get_full_channel_set(channel_name: str, logger_general: logging.Logger,
backup_channels_for_twilio.add_channel(telegram_channel)
return ChannelSet(channels)
+
+
+def get_periodic_alive_reminder_channel_set(channel_name: str,
+ logger_general: logging.Logger,
+ redis: Optional[RedisApi],
+ alerts_log_file: str,
+ internal_conf:
+ InternalConfig = InternalConf,
+ user_conf: UserConfig = UserConf) \
+ -> ChannelSet:
+ # Initialise list of channels with default channels
+ channels = [
+ _get_console_channel(channel_name, logger_general),
+ _get_log_channel(alerts_log_file, channel_name, logger_general,
+ internal_conf)
+ ]
+
+ # Initialise backup channel sets with default channels
+ backup_channels_for_telegram = ChannelSet(channels)
+
+ # Add telegram alerts to channel set if they are enabled from config file
+ if user_conf.telegram_alerts_enabled and \
+ user_conf.telegram_enabled:
+ telegram_channel = _get_telegram_channel(channel_name, logger_general,
+ redis,
+ backup_channels_for_telegram,
+ user_conf)
+ channels.append(telegram_channel)
+
+ # Add email alerts to channel set if they are enabled from config file
+ if user_conf.email_alerts_enabled and \
+ user_conf.email_enabled:
+ email_channel = _get_email_channel(channel_name, logger_general,
+ redis, user_conf)
+ channels.append(email_channel)
+ else:
+ email_channel = None
+
+ # Set up email channel as backup channel for telegram and twilio
+ if email_channel is not None:
+ backup_channels_for_telegram.add_channel(email_channel)
+
+ return ChannelSet(channels)
diff --git a/src/alerting/alerts/alerts.py b/src/alerting/alerts/alerts.py
index c0202b2..ef3ae93 100644
--- a/src/alerting/alerts/alerts.py
+++ b/src/alerting/alerts/alerts.py
@@ -132,6 +132,15 @@ def __init__(self, node: str, danger: int) -> None:
''.format(node, danger, danger))
+class PeersIncreasedOutsideSafeRangeAlert(Alert):
+
+ def __init__(self, node: str, safe: int) -> None:
+ super().__init__(
+ '{} peers INCREASED to more than {} peers. No further peer change'
+ ' alerts will be sent unless the number of peers goes below {}.'
+ ''.format(node, safe, safe))
+
+
class PeersDecreasedAlert(Alert):
def __init__(self, node: str, old_peers: int, new_peers: int) -> None:
@@ -199,3 +208,9 @@ class ProblemWithTelegramBot(Alert):
def __init__(self, description: str) -> None:
super().__init__(
'Problem encountered with telegram bot: {}'.format(description))
+
+
+class AlerterAliveAlert(Alert):
+
+ def __init__(self) -> None:
+ super().__init__('Still running.')
diff --git a/src/alerting/channels/console.py b/src/alerting/channels/console.py
index 681a2b4..3f2f26c 100644
--- a/src/alerting/channels/console.py
+++ b/src/alerting/channels/console.py
@@ -1,8 +1,8 @@
import logging
import sys
-from src.alerting.channels.channel import Channel
from src.alerting.alerts.alerts import Alert
+from src.alerting.channels.channel import Channel
class ConsoleChannel(Channel):
diff --git a/src/alerting/channels/log.py b/src/alerting/channels/log.py
index 9e62fb6..ef60743 100644
--- a/src/alerting/channels/log.py
+++ b/src/alerting/channels/log.py
@@ -1,7 +1,7 @@
import logging
-from src.alerting.channels.channel import Channel
from src.alerting.alerts.alerts import Alert
+from src.alerting.channels.channel import Channel
class LogChannel(Channel):
diff --git a/src/alerting/channels/telegram.py b/src/alerting/channels/telegram.py
index 0590cf8..dc14caa 100644
--- a/src/alerting/channels/telegram.py
+++ b/src/alerting/channels/telegram.py
@@ -2,8 +2,8 @@
from typing import Optional
from src.alerting.alert_utils.telegram_bot_api import TelegramBotApi
-from src.alerting.channels.channel import Channel, ChannelSet
from src.alerting.alerts.alerts import Alert, ProblemWithTelegramBot
+from src.alerting.channels.channel import Channel, ChannelSet
from src.utils.redis_api import RedisApi
diff --git a/src/alerting/periodic/periodic.py b/src/alerting/periodic/periodic.py
new file mode 100644
index 0000000..b228c91
--- /dev/null
+++ b/src/alerting/periodic/periodic.py
@@ -0,0 +1,20 @@
+from datetime import timedelta
+from time import sleep
+
+from src.alerting.alerts.alerts import AlerterAliveAlert
+from src.alerting.channels.channel import ChannelSet
+from src.utils.redis_api import RedisApi
+
+
+def periodic_alive_reminder(interval: timedelta, channel_set: ChannelSet,
+ mute_key: str, redis: RedisApi):
+ while True:
+ sleep(interval.total_seconds())
+ send_alive_alert(redis, mute_key, channel_set)
+
+
+def send_alive_alert(redis: RedisApi, mute_key: str,
+ channel_set: ChannelSet) -> None:
+ # If reminder is not muted, inform operator that alerter is still alive.
+ if not redis.exists(mute_key):
+ channel_set.alert_info(AlerterAliveAlert())
diff --git a/src/commands/commands.py b/src/commands/commands.py
new file mode 100644
index 0000000..cd7d2e7
--- /dev/null
+++ b/src/commands/commands.py
@@ -0,0 +1,52 @@
+import logging
+from typing import Optional
+
+from src.utils.config_parsers.internal import InternalConfig
+from src.utils.config_parsers.internal_parsed import InternalConf
+from src.utils.config_parsers.user import UserConfig
+from src.utils.config_parsers.user_parsed import UserConf
+from src.utils.redis_api import RedisApi
+
+
+class Commands:
+
+ def __init__(self, logger: logging.Logger, redis: Optional[RedisApi],
+ redis_snooze_key: Optional[str], redis_mute_key: Optional[str],
+ redis_node_monitor_alive_key_prefix: Optional[str],
+ redis_network_monitor_alive_key_prefix: Optional[str],
+ internal_conf: InternalConfig = InternalConf,
+ user_conf: UserConfig = UserConf) -> None:
+ self._logger = logger
+
+ self._redis = redis
+ self._redis_enabled = redis is not None
+ self._redis_snooze_key = redis_snooze_key
+ self._redis_mute_key = redis_mute_key
+ self._redis_node_monitor_alive_key_prefix = \
+ redis_node_monitor_alive_key_prefix
+ self._redis_network_monitor_alive_key_prefix = \
+ redis_network_monitor_alive_key_prefix
+
+ self._internal_conf = internal_conf
+ self._user_conf = user_conf
+
+ def snooze(self) -> None:
+ pass
+
+ def unsnooze(self) -> None:
+ pass
+
+ def mute(self) -> None:
+ pass
+
+ def unmute(self) -> None:
+ pass
+
+ def add_node(self) -> None:
+ pass
+
+ def remove_node(self) -> None:
+ pass
+
+ def current_nodes(self) -> None:
+ pass
diff --git a/src/commands/handler_utils/telegram_handler.py b/src/commands/handler_utils/telegram_handler.py
index 2f3609f..b8c8f28 100644
--- a/src/commands/handler_utils/telegram_handler.py
+++ b/src/commands/handler_utils/telegram_handler.py
@@ -1,8 +1,8 @@
from typing import Optional, List
-from telegram import Bot, Update
+from telegram import Update
from telegram.ext import Updater, CommandHandler, \
- Handler
+ Handler, CallbackContext
from src.alerting.alert_utils.telegram_bot_api import TelegramBotApi
@@ -15,7 +15,7 @@ def __init__(self, bot_token: str, authorised_chat_id: Optional[str],
self._authorised_chat_id = authorised_chat_id
# Set up updater
- self._updater = Updater(token=bot_token)
+ self._updater = Updater(token=bot_token, use_context=True)
# Set up handlers
ping_handler = CommandHandler('ping', self._ping_callback)
@@ -34,7 +34,7 @@ def start_handling(self, run_in_background: bool = False) -> None:
if not run_in_background:
self._updater.idle(stop_signals=[])
- def authorise(self, bot: Bot, update: Update) -> bool:
+ def authorise(self, update: Update, context: CallbackContext) -> bool:
if self._authorised_chat_id in [None, str(update.message.chat_id)]:
return True
else:
@@ -43,11 +43,11 @@ def authorise(self, bot: Bot, update: Update) -> bool:
api = TelegramBotApi(self._bot_token, self._authorised_chat_id)
api.send_message(
'Received command from unrecognised user: '
- 'bot={}, update={}'.format(bot, update))
+ 'update={}, context={}'.format(update, context))
return False
- def _ping_callback(self, bot: Bot, update: Update) -> None:
- if self.authorise(bot, update):
+ def _ping_callback(self, update: Update, context: CallbackContext) -> None:
+ if self.authorise(update, context):
update.message.reply_text('PONG!')
def stop(self) -> None:
diff --git a/src/commands/handlers/telegram.py b/src/commands/handlers/telegram.py
index f8afdee..f55fb53 100644
--- a/src/commands/handlers/telegram.py
+++ b/src/commands/handlers/telegram.py
@@ -1,11 +1,13 @@
import logging
-from datetime import datetime, timedelta
+from datetime import timedelta, datetime
from typing import Optional
from redis import RedisError
-from telegram import Update, Bot
-from telegram.ext import CommandHandler, MessageHandler, Filters
+from telegram import Update
+from telegram.ext import CommandHandler, MessageHandler, Filters, \
+ CallbackContext
+from src.commands.commands import Commands
from src.commands.handler_utils.telegram_handler import TelegramCommandHandler
from src.utils.config_parsers.internal import InternalConfig
from src.utils.config_parsers.internal_parsed import InternalConf
@@ -14,33 +16,27 @@
from src.utils.redis_api import RedisApi
-class TelegramCommands:
+class TelegramCommands(Commands):
def __init__(self, bot_token: str, authorised_chat_id: str,
logger: logging.Logger, redis: Optional[RedisApi],
- redis_snooze_key: Optional[str],
+ redis_snooze_key: Optional[str], redis_mute_key: Optional[str],
redis_node_monitor_alive_key_prefix: Optional[str],
redis_network_monitor_alive_key_prefix: Optional[str],
internal_conf: InternalConfig = InternalConf,
user_conf: UserConfig = UserConf) -> None:
- self._logger = logger
-
- self._redis = redis
- self._redis_enabled = redis is not None
- self._redis_snooze_key = redis_snooze_key
- self._redis_node_monitor_alive_key_prefix = \
- redis_node_monitor_alive_key_prefix
- self._redis_network_monitor_alive_key_prefix = \
- redis_network_monitor_alive_key_prefix
-
- self._internal_conf = internal_conf
- self._user_conf = user_conf
+ super().__init__(logger, redis, redis_snooze_key, redis_mute_key,
+ redis_node_monitor_alive_key_prefix,
+ redis_network_monitor_alive_key_prefix, internal_conf,
+ user_conf)
# Set up command handlers (command and respective callback function)
command_handlers = [
CommandHandler('start', self._start_callback),
CommandHandler('snooze', self._snooze_callback),
+ CommandHandler('mute', self._mute_callback),
+ CommandHandler('unmute', self._unmute_callback),
CommandHandler('unsnooze', self._unsnooze_callback),
CommandHandler('status', self._status_callback),
CommandHandler('validators', self._validators_callback),
@@ -63,21 +59,21 @@ def formatted_reply(update: Update, reply: str):
# Adds Markdown formatting
update.message.reply_text(reply, parse_mode='Markdown')
- def _start_callback(self, bot: Bot, update: Update):
- self._logger.info('Received /start command: bot=%s, update=%s',
- bot, update)
+ def _start_callback(self, update: Update, context: CallbackContext):
+ self._logger.info('Received /start command: update=%s, context=%s',
+ update, context)
# If authorised, send welcome message
- if self.cmd_handler.authorise(bot, update):
+ if self.cmd_handler.authorise(update, context):
update.message.reply_text("Welcome to the P.A.N.I.C. alerter bot!\n"
"Type /help for more information.")
- def _snooze_callback(self, bot: Bot, update: Update):
- self._logger.info('Received /snooze command: bot=%s, update=%s',
- bot, update)
+ def _snooze_callback(self, update: Update, context: CallbackContext):
+ self._logger.info('Received /snooze command: update=%s, context=%s',
+ update, context)
# If authorised, snooze phone calls if Redis enabled
- if self.cmd_handler.authorise(bot, update):
+ if self.cmd_handler.authorise(update, context):
if self._redis_enabled:
# Expected: /snooze
message_parts = update.message.text.split(' ')
@@ -104,12 +100,11 @@ def _snooze_callback(self, bot: Bot, update: Update):
update.message.reply_text('Snoozing is not available given '
'that Redis is not set up.')
- def _unsnooze_callback(self, bot: Bot, update: Update):
- self._logger.info('Received /unsnooze command: bot=%s, update=%s',
- bot, update)
-
+ def _unsnooze_callback(self, update: Update, context: CallbackContext):
+ self._logger.info('Received /unsnooze command: update=%s, context=%s',
+ update, context)
# If authorised, unsnooze phone calls if Redis enabled
- if self.cmd_handler.authorise(bot, update):
+ if self.cmd_handler.authorise(update, context):
if self._redis_enabled:
# Remove snooze key if it exists
if self._redis.exists(self._redis_snooze_key):
@@ -122,12 +117,62 @@ def _unsnooze_callback(self, bot: Bot, update: Update):
update.message.reply_text('Unsnoozing is not available given '
'that Redis is not set up.')
- def _status_callback(self, bot: Bot, update: Update):
- self._logger.info('Received /status command: bot=%s, update=%s',
- bot, update)
+ def _mute_callback(self, update: Update, context: CallbackContext):
+ self._logger.info('Received /mute command: update=%s, context=%s',
+ update, context)
+
+ # If authorised, mute the periodic alive reminder if Redis enabled
+ if self.cmd_handler.authorise(update, context):
+ if self._redis_enabled:
+ # Expected: /mute
+ message_parts = update.message.text.split(' ')
+ if len(message_parts) == 2:
+ try:
+ # Get number of hours and set temporary Redis key
+ hours = timedelta(hours=float(message_parts[1]))
+ until = str(datetime.now() + hours)
+ set_ret = self._redis.set_for(
+ self._redis_mute_key, until, hours)
+ if set_ret is None:
+ update.message.reply_text(
+ 'Muting unsuccessful due to an issue with '
+ 'Redis. Check /status to see if it is online.')
+ else:
+ update.message.reply_text(
+ 'The periodic alive reminder has been muted for'
+ ' {} hours until {}'.format(hours, until))
+ except ValueError:
+ update.message.reply_text('I expected a no. of hours.')
+ else:
+ update.message.reply_text('I expected exactly one value.')
+ else:
+ update.message.reply_text('Muting is not available given '
+ 'that Redis is not set up.')
+
+ def _unmute_callback(self, update: Update, context: CallbackContext):
+ self._logger.info('Received /unmute command: update=%s, context=%s',
+ update, context)
+ # If authorised, unmute the periodic alive reminder if Redis enabled
+ if self.cmd_handler.authorise(update, context):
+ if self._redis_enabled:
+ # Remove mute key if it exists
+ if self._redis.exists(self._redis_mute_key):
+ self._redis.remove(self._redis_mute_key)
+ update.message.reply_text(
+ 'The periodic alive reminder has been unmuted.')
+ else:
+ update.message.reply_text('The periodic alive reminder was '
+ 'not muted.')
+ else:
+ update.message.reply_text('Unmuting is not available given '
+ 'that Redis is not set up.')
+
+ def _status_callback(self, update: Update, context: CallbackContext):
+ self._logger.info('Received /status command: update=%s, context=%s',
+ update, context)
# If authorised, send status if Redis enabled
- if self.cmd_handler.authorise(bot, update):
+ if self.cmd_handler.authorise(update, context):
if self._redis_enabled:
status = ""
@@ -165,6 +210,17 @@ def _status_callback(self, bot: Bot, update: Update):
else:
status += '- Twilio calls are not snoozed.\n'
+ # Add periodic alive reminder mute state to status
+ if redis_running:
+ if self._redis.exists(self._redis_mute_key):
+ until = self._redis.get(
+ self._redis_mute_key).decode("utf-8")
+ status += '- The periodic alive reminder has been ' \
+ 'muted until {}.\n'.format(until)
+ else:
+ status += '- The periodic alive reminder is not ' \
+ 'muted.\n'
+
# Add node monitor latest updates to status
if redis_running:
node_monitor_keys_list = self._redis.get_keys(
@@ -206,11 +262,22 @@ def _status_callback(self, bot: Bot, update: Update):
if len(net_monitor_keys_list) == 0:
status += '- No recent update from network monitors.\n'
+ for name in net_monitor_names:
+ redis_last_height_checked_key = \
+ self._internal_conf. \
+ redis_network_monitor_last_height_key_prefix + \
+ name
+ last_height_checked = self._redis.get(
+ redis_last_height_checked_key).decode('utf-8')
+ status += '- *{}* is currently in block height {}' \
+ '.\n'.format(name, last_height_checked)
+
# If redis is not running
if not redis_running:
status += \
'- Since Redis is not accessible, Twilio calls are ' \
- 'considered not snoozed and any recent update from ' \
+ 'considered not snoozed, the periodic alive reminder ' \
+ 'is not muted, and any recent update from ' \
'node or network monitors is not accessible.\n'
# Send status
@@ -220,12 +287,12 @@ def _status_callback(self, bot: Bot, update: Update):
update.message.reply_text('Status update not available given '
'that Redis is not set up.')
- def _validators_callback(self, bot: Bot, update: Update):
- self._logger.info('Received /validators command: bot=%s, update=%s',
- bot, update)
+ def _validators_callback(self, update: Update, context: CallbackContext):
+ self._logger.info('Received /validators command: update=%s, context=%s',
+ update, context)
# If authorised, send list of links to validators
- if self.cmd_handler.authorise(bot, update):
+ if self.cmd_handler.authorise(update, context):
update.message.reply_text(
'Links to validators:\n'
' Hubble: {}\n'
@@ -239,12 +306,12 @@ def _validators_callback(self, bot: Bot, update: Update):
self._internal_conf.validators_mintscan_link,
self._internal_conf.validators_lunie_link))
- def _block_callback(self, bot: Bot, update: Update):
- self._logger.info('Received /block command: bot=%s, update=%s',
- bot, update)
+ def _block_callback(self, update: Update, context: CallbackContext):
+ self._logger.info('Received /block command: update=%s, context=%s',
+ update, context)
# If authorised, send list of links to specified block
- if self.cmd_handler.authorise(bot, update):
+ if self.cmd_handler.authorise(update, context):
# Expected: /block
message_parts = update.message.text.split(' ')
if len(message_parts) == 2:
@@ -272,12 +339,12 @@ def _block_callback(self, bot: Bot, update: Update):
else:
update.message.reply_text("I expected exactly one value.")
- def _tx_callback(self, bot: Bot, update: Update):
- self._logger.info('Received /tx command: bot=%s, update=%s',
- bot, update)
+ def _tx_callback(self, update: Update, context: CallbackContext):
+ self._logger.info('Received /tx command: update=%s, context=%s',
+ update, context)
# If authorised, send list of links to specified transaction
- if self.cmd_handler.authorise(bot, update):
+ if self.cmd_handler.authorise(update, context):
# Expected: /tx
message_parts = update.message.text.split(' ')
if len(message_parts) == 2:
@@ -296,28 +363,34 @@ def _tx_callback(self, bot: Bot, update: Update):
else:
update.message.reply_text("I expected exactly one value.")
- def _help_callback(self, bot: Bot, update: Update):
- self._logger.info('Received /help command: bot=%s, update=%s',
- bot, update)
+ def _help_callback(self, update: Update, context: CallbackContext):
+ self._logger.info('Received /help command: update=%s, context=%s',
+ update, context)
# If authorised, send help message with available commands
- if self.cmd_handler.authorise(bot, update):
+ if self.cmd_handler.authorise(update, context):
update.message.reply_text(
'Hey! These are the available commands:\n'
' /start: welcome message\n'
' /ping: ping the Telegram commands handler\n'
' /snooze : snoozes phone calls for \n'
' /unsnooze: unsnoozes phone calls\n'
+ ' /mute : mute periodic alive reminder for \n'
+ ' /unmute: unmute periodic alive reminder\n'
' /status: shows status message\n'
' /validators: shows links to validators\n'
' /block : shows link to specified block\n'
' /tx : shows link to specified transaction\n'
' /help: shows this message')
- def _unknown_callback(self, bot: Bot, update: Update) -> None:
- self._logger.info('Received unrecognized command: bot=%s, update=%s',
- bot, update)
+ def _unknown_callback(self, update: Update,
+ context: CallbackContext) -> None:
+ self._logger.info(
+ 'Received unrecognized command: update=%s, context=%s',
+ update, context)
# If authorised, send a default message for unrecognized commands
- if self.cmd_handler.authorise(bot, update):
+ if self.cmd_handler.authorise(update, context):
update.message.reply_text('I did not understand (Type /help)')
+
+ # TODO: Need to add callbacks for add_node, remove_node, current_nodes
diff --git a/src/monitoring/monitor_utils/live_check.py b/src/monitoring/monitor_utils/live_check.py
index 66103b4..4dbb902 100644
--- a/src/monitoring/monitor_utils/live_check.py
+++ b/src/monitoring/monitor_utils/live_check.py
@@ -1,7 +1,7 @@
import logging
import requests
-from requests.exceptions import ConnectionError
+from requests.exceptions import ConnectionError as ReqConnectionError
def live_check_unsafe(endpoint: str, logger: logging.Logger) -> None:
@@ -14,5 +14,5 @@ def live_check(endpoint: str, logger: logging.Logger) -> bool:
try:
live_check_unsafe(endpoint, logger)
return True
- except ConnectionError:
+ except ReqConnectionError:
return False
diff --git a/src/monitoring/monitors/github.py b/src/monitoring/monitors/github.py
index d745ebb..fdd3858 100644
--- a/src/monitoring/monitors/github.py
+++ b/src/monitoring/monitors/github.py
@@ -1,8 +1,8 @@
import logging
from typing import Optional
-from src.alerting.channels.channel import ChannelSet
from src.alerting.alerts.alerts import NewGitHubReleaseAlert
+from src.alerting.channels.channel import ChannelSet
from src.monitoring.monitor_utils.get_json import get_json
from src.monitoring.monitors.monitor import Monitor
from src.utils.redis_api import RedisApi
@@ -56,6 +56,7 @@ def monitor(self) -> None:
releases = get_json(self.releases_page, self._logger)
# If response contains a message, skip monitoring this time round
+ # since the presence of a message indicates an error in the API call
if 'message' in releases:
self.logger.warning('GitHub message: %s', releases['message'])
return
diff --git a/src/monitoring/monitors/monitor_starters.py b/src/monitoring/monitors/monitor_starters.py
index ff0e7b9..314ccd4 100644
--- a/src/monitoring/monitors/monitor_starters.py
+++ b/src/monitoring/monitors/monitor_starters.py
@@ -4,7 +4,8 @@
from json import JSONDecodeError
import urllib3.exceptions
-from requests.exceptions import ConnectionError, ReadTimeout
+from requests.exceptions import ConnectionError as ReqConnectionError, \
+ ReadTimeout
from src.alerting.alerts.alerts import CouldNotFindLiveFullNodeAlert, \
ErrorWhenReadingDataFromNode, CannotAccessGitHubPageAlert
@@ -25,12 +26,10 @@ def start_node_monitor(node_monitor: NodeMonitor, monitor_period: int,
logger.debug('Reading %s.', node_monitor.node)
node_monitor.monitor()
logger.debug('Done reading %s.', node_monitor.node)
- except ConnectionError as conn_err:
- node_monitor.node.set_as_down(node_monitor.channels,
- conn_err, logger)
- except ReadTimeout as read_timeout:
- node_monitor.node.set_as_down(node_monitor.channels,
- read_timeout, logger)
+ except ReqConnectionError:
+ node_monitor.node.set_as_down(node_monitor.channels, logger)
+ except ReadTimeout:
+ node_monitor.node.set_as_down(node_monitor.channels, logger)
except (urllib3.exceptions.IncompleteRead,
http.client.IncompleteRead) as incomplete_read:
logger.error('Error when reading data from {}: {}. '
@@ -61,9 +60,9 @@ def start_network_monitor(network_monitor: NetworkMonitor, monitor_period: int,
except NoLiveFullNodeException:
network_monitor.channels.alert_major(
CouldNotFindLiveFullNodeAlert(network_monitor.monitor_name))
- except (ConnectionError, ReadTimeout) as conn_err:
+ except (ReqConnectionError, ReadTimeout):
network_monitor.last_full_node_used.set_as_down(
- network_monitor.channels, conn_err, logger)
+ network_monitor.channels, logger)
except (urllib3.exceptions.IncompleteRead,
http.client.IncompleteRead) as incomplete_read:
network_monitor.channels.alert_error(ErrorWhenReadingDataFromNode(
@@ -78,8 +77,9 @@ def start_network_monitor(network_monitor: NetworkMonitor, monitor_period: int,
network_monitor.save_state()
# Sleep
- logger.debug('Sleeping for %s seconds.', monitor_period)
- time.sleep(monitor_period)
+ if not network_monitor.is_syncing():
+ logger.debug('Sleeping for %s seconds.', monitor_period)
+ time.sleep(monitor_period)
def start_github_monitor(github_monitor: GitHubMonitor, monitor_period: int,
@@ -101,7 +101,7 @@ def start_github_monitor(github_monitor: GitHubMonitor, monitor_period: int,
# Reset alert limiter
github_error_alert_limiter.reset()
- except (ConnectionError, ReadTimeout) as conn_err:
+ except (ReqConnectionError, ReadTimeout) as conn_err:
if github_error_alert_limiter.can_do_task():
github_monitor.channels.alert_error(
CannotAccessGitHubPageAlert(github_monitor.releases_page))
diff --git a/src/monitoring/monitors/network.py b/src/monitoring/monitors/network.py
index 6278f71..12fb66d 100644
--- a/src/monitoring/monitors/network.py
+++ b/src/monitoring/monitors/network.py
@@ -1,6 +1,5 @@
import logging
from datetime import datetime, timedelta
-from time import sleep
from typing import List, Optional
import dateutil
@@ -17,15 +16,20 @@
class NetworkMonitor(Monitor):
def __init__(self, monitor_name: str, channels: ChannelSet,
- logger: logging.Logger, redis: Optional[RedisApi],
- all_full_nodes: List[Node], all_validators: List[Node]):
+ logger: logging.Logger,
+ network_monitor_max_catch_up_blocks: int,
+ redis: Optional[RedisApi], all_full_nodes: List[Node],
+ all_validators: List[Node]):
super().__init__(monitor_name, channels, logger, redis)
+ self.network_monitor_max_catch_up_blocks = \
+ network_monitor_max_catch_up_blocks
self._all_full_nodes = all_full_nodes
self._all_validators = all_validators
self.last_full_node_used = None
self._last_height_checked = None
+ self._monitor_is_syncing = False
self._redis_alive_key_timeout = \
self._internal_conf.redis_network_monitor_alive_key_timeout
@@ -38,6 +42,9 @@ def __init__(self, monitor_name: str, channels: ChannelSet,
self.load_state()
+ def is_syncing(self) -> bool:
+ return self._monitor_is_syncing
+
def load_state(self) -> None:
# If Redis is enabled, load the last height checked, if any
if self.redis_enabled:
@@ -75,6 +82,41 @@ def node(self) -> Node:
return n
raise NoLiveFullNodeException()
+ def _check_block(self, height: int) -> None:
+ self._logger.info('%s obtaining data at height %s',
+ self._monitor_name, height)
+
+ # Get block
+ block = get_cosmos_json(self.node.rpc_url + '/block?height=' +
+ str(height), self._logger)
+
+ # Get validators participating in the precommits of last commit
+ block_precommits = block['block']['last_commit']['precommits']
+ non_null_precommits = filter(lambda p: p, block_precommits)
+ block_precommits_validators = set(
+ map(lambda p: p['validator_address'], non_null_precommits))
+ total_no_of_missing_validators = \
+ len(block_precommits) - len(block_precommits_validators)
+
+ self._logger.debug('Precommit validators: %s',
+ block_precommits_validators)
+ self._logger.debug('Total missing validators: %s',
+ total_no_of_missing_validators)
+
+ # Call method based on whether block missed or not
+ for v in self._all_validators:
+ if v.pubkey not in block_precommits_validators:
+ block_time = block['block']['header']['time']
+ v.add_missed_block(
+ height - 1, # '- 1' since it's actually previous height
+ dateutil.parser.parse(block_time, ignoretz=True),
+ total_no_of_missing_validators, self.channels,
+ self.logger)
+ else:
+ v.clear_missed_blocks(self.channels, self.logger)
+
+ self._logger.debug('Moving to next height.')
+
def monitor(self) -> None:
# Get abci_info and, from that, the last height to be checked
abci_info = get_cosmos_json(self.node.rpc_url + '/abci_info',
@@ -87,47 +129,17 @@ def monitor(self) -> None:
# Consider any height that is after the previous last height
height = self._last_height_checked + 1
- while height <= last_height_to_check:
- self._logger.info('%s obtaining data at height %s',
- self._monitor_name, height)
-
- # Get block
- block = get_cosmos_json(self.node.rpc_url + '/block?height=' +
- str(height), self._logger)
-
- # Get validators participating in the precommits of last commit
- block_precommits = block['block']['last_commit']['precommits']
- non_null_precommits = filter(lambda p: p, block_precommits)
- block_precommits_validators = set(
- map(lambda p: p['validator_address'], non_null_precommits))
- total_no_of_missing_validators = \
- len(block_precommits) - len(block_precommits_validators)
-
- self._logger.debug('Precommit validators: %s',
- block_precommits_validators)
- self._logger.debug('Total missing validators: %s',
- total_no_of_missing_validators)
-
- # Call method based on whether block missed or not
- for v in self._all_validators:
- if v.pubkey not in block_precommits_validators:
- block_time = block['block']['header']['time']
- v.add_missed_block(
- height - 1, # '- 1' since it's actually previous height
- dateutil.parser.parse(block_time, ignoretz=True),
- total_no_of_missing_validators, self.channels,
- self.logger)
- else:
- v.clear_missed_blocks(self.channels, self.logger)
-
- self._logger.debug('Moving to next height.')
-
- # Move to next block
- height += 1
-
- # If there is a next height to check, sleep for a bit
- if height <= last_height_to_check:
- self.logger.debug('Sleeping for 0.5 second between heights.')
- sleep(0.5)
-
- self._last_height_checked = last_height_to_check
+ if last_height_to_check - self._last_height_checked > \
+ self.network_monitor_max_catch_up_blocks:
+ height = last_height_to_check - \
+ self.network_monitor_max_catch_up_blocks
+ self._check_block(height)
+ self._last_height_checked = height
+ elif height <= last_height_to_check:
+ self._check_block(height)
+ self._last_height_checked = height
+
+ if last_height_to_check - self._last_height_checked > 2:
+ self._monitor_is_syncing = True
+ else:
+ self._monitor_is_syncing = False
diff --git a/src/node/node.py b/src/node/node.py
index f8b182e..71a400d 100644
--- a/src/node/node.py
+++ b/src/node/node.py
@@ -11,7 +11,8 @@
VotingPowerDecreasedAlert, VotingPowerIncreasedAlert, \
VotingPowerIncreasedByAlert, VotingPowerDecreasedByAlert, \
IsCatchingUpAlert, IsNoLongerCatchingUpAlert, PeersIncreasedAlert, \
- PeersDecreasedAlert, PeersIncreasedOutsideDangerRangeAlert
+ PeersDecreasedAlert, PeersIncreasedOutsideDangerRangeAlert, \
+ PeersIncreasedOutsideSafeRangeAlert
from src.alerting.channels.channel import ChannelSet
from src.utils.config_parsers.internal import InternalConfig
from src.utils.config_parsers.internal_parsed import InternalConf
@@ -49,6 +50,8 @@ def __init__(self, name: str, rpc_url: Optional[str], node_type: NodeType,
self._validator_peer_danger_boundary = \
internal_conf.validator_peer_danger_boundary
+ self._validator_peer_safe_boundary = \
+ internal_conf.validator_peer_safe_boundary
self._full_node_peer_danger_boundary = \
internal_conf.full_node_peer_danger_boundary
self._missed_blocks_danger_boundary = \
@@ -145,8 +148,7 @@ def save_state(self, logger: logging.Logger) -> None:
self._redis_prefix + '_no_of_peers': self._no_of_peers
})
- def set_as_down(self, channels: ChannelSet, error: Exception,
- logger: logging.Logger) -> None:
+ def set_as_down(self, channels: ChannelSet, logger: logging.Logger) -> None:
logger.debug('%s set_as_down: is_down(currently)=%s, channels=%s',
self, self.is_down, channels)
@@ -291,12 +293,11 @@ def set_catching_up(self, now_catching_up: bool,
'%s set_catching_up: before=%s, new=%s, channels=%s',
self, self.catching_up, now_catching_up, channels)
- # Alert if catching up has changed for validator
- if self.is_validator:
- if not self.catching_up and now_catching_up:
- channels.alert_minor(IsCatchingUpAlert(self.name))
- elif self.catching_up and not now_catching_up:
- channels.alert_info(IsNoLongerCatchingUpAlert(self.name))
+ # Alert if catching up has changed
+ if not self.catching_up and now_catching_up:
+ channels.alert_minor(IsCatchingUpAlert(self.name))
+ elif self.catching_up and not now_catching_up:
+ channels.alert_info(IsNoLongerCatchingUpAlert(self.name))
# Update catching-up
self._catching_up = now_catching_up
@@ -311,24 +312,35 @@ def set_no_of_peers(self, new_no_of_peers: int, channels: ChannelSet,
# Variable alias for improved readability
if self.is_validator:
danger = self._validator_peer_danger_boundary
+ safe = self._validator_peer_safe_boundary
else:
danger = self._full_node_peer_danger_boundary
+ safe = None
# Alert if number of peers has changed
if self.no_of_peers not in [None, new_no_of_peers]:
if self.is_validator:
- if new_no_of_peers > self.no_of_peers: # increase
- channels.alert_info(PeersIncreasedAlert(
- self.name, self.no_of_peers, new_no_of_peers))
- elif new_no_of_peers > danger: # decrease outside danger range
- channels.alert_minor(PeersDecreasedAlert(
- self.name, self.no_of_peers, new_no_of_peers))
- else: # decrease inside danger range
- channels.alert_major(PeersDecreasedAlert(
- self.name, self.no_of_peers, new_no_of_peers))
+ if new_no_of_peers <= self._validator_peer_safe_boundary:
+ # beneath safe boundary
+ if new_no_of_peers > self.no_of_peers: # increase
+ channels.alert_info(PeersIncreasedAlert(
+ self.name, self.no_of_peers, new_no_of_peers))
+ elif new_no_of_peers > danger:
+ # decrease outside danger range
+ channels.alert_minor(PeersDecreasedAlert(
+ self.name, self.no_of_peers, new_no_of_peers))
+ else: # decrease inside danger range
+ channels.alert_major(PeersDecreasedAlert(
+ self.name, self.no_of_peers, new_no_of_peers))
+ elif self._no_of_peers <= self._validator_peer_safe_boundary \
+ < new_no_of_peers:
+ # increase outside safe range for the first time
+ channels.alert_info(
+ PeersIncreasedOutsideSafeRangeAlert(self.name, safe))
else:
if new_no_of_peers > self.no_of_peers: # increase
- if new_no_of_peers < danger: # increase inside danger range
+ if new_no_of_peers <= danger:
+ # increase inside danger range
channels.alert_info(PeersIncreasedAlert(
self.name, self.no_of_peers, new_no_of_peers))
elif self.no_of_peers <= danger < new_no_of_peers:
diff --git a/src/setup/setup_user_config_main.py b/src/setup/setup_user_config_main.py
index 5438f5d..327c5f3 100644
--- a/src/setup/setup_user_config_main.py
+++ b/src/setup/setup_user_config_main.py
@@ -1,10 +1,12 @@
from configparser import ConfigParser
+from datetime import timedelta
from src.alerting.alert_utils.telegram_bot_api import TelegramBotApi
from src.alerting.alert_utils.twilio_api import TwilioApi
from src.setup.setup_user_config_main_tests import test_telegram_alerts, \
TestOutcome, test_email_alerts, test_twilio_alerts, \
test_telegram_commands, test_redis
+from src.utils.datetime import strfdelta
from src.utils.user_input import yn_prompt
@@ -237,7 +239,7 @@ def setup_twilio_alerts(cp: ConfigParser) -> None:
cp['twilio_alerts']['phone_numbers_to_dial'] = to_dial
-def setup_alerts(cp: ConfigParser) -> None:
+def setup_alert_channels(cp: ConfigParser) -> None:
print('==== Alerts')
print('By default, alerts are output to a log file and to '
'the console. Let\'s set up the rest of the alerts.')
@@ -350,12 +352,90 @@ def setup_redis(cp: ConfigParser) -> None:
cp['redis']['password'] = password
+def setup_periodic_alerts(cp: ConfigParser) -> None:
+ print('==== Periodic alerts')
+ setup_periodic_alive_reminder(cp)
+
+
+def setup_periodic_alive_reminder(cp: ConfigParser) -> None:
+ print('---- Periodic alive reminder')
+ print('The periodic alive reminder is a way for the alerter to inform its '
+ 'users that it is still running.')
+
+ already_set_up = is_already_set_up(cp, 'periodic_alive_reminder')
+ if already_set_up and \
+ not yn_prompt('The periodic alive reminder is already set up. '
+ 'Do you wish to clear the current config? (Y/n)\n'):
+ return
+
+ reset_section('periodic_alive_reminder', cp)
+ cp['periodic_alive_reminder']['enabled'] = str(False)
+ cp['periodic_alive_reminder']['interval_seconds'] = ''
+ cp['periodic_alive_reminder']['email_enabled'] = ''
+ cp['periodic_alive_reminder']['telegram_enabled'] = ''
+
+ if not already_set_up and \
+ not yn_prompt('Do you wish to set up the periodic alive reminder? '
+ '(Y/n)\n'):
+ return
+
+ interval = input("Please enter the amount of seconds you want to "
+ "pass for the periodic alive reminder. Make sure that "
+ "you insert a positive integer.\n")
+ while True:
+ try:
+ interval_number_rep = int(interval)
+ except ValueError:
+ interval = input("Input is not a valid integer. Please enter "
+ "another value\n")
+ continue
+ if interval_number_rep > 0:
+ time = timedelta(seconds=int(interval_number_rep))
+ time = strfdelta(time, "{hours}h {minutes}m {seconds}s")
+ if yn_prompt(
+ 'You will be reminded that the alerter is still running '
+ 'after ' + time + ". Do you want to confirm this (Y/n) \n"):
+ break
+ else:
+ interval = input(
+ "Please enter the amount of seconds you want to "
+ "pass for the periodic alive reminder. Make sure that "
+ "you insert a positive integer.\n")
+ else:
+ interval = input("Input is not a positive integer. Please enter "
+ "another value\n")
+
+ if is_already_set_up(cp, 'email_alerts') and \
+ cp['email_alerts']['enabled'] and \
+ yn_prompt('Would you like the periodic alive reminder '
+ 'to send alerts via e-mail? (Y/n)\n'):
+ email_enabled = str(True)
+ else:
+ email_enabled = str(False)
+
+ if is_already_set_up(cp, 'telegram_alerts') and \
+ cp['telegram_alerts']['enabled'] and \
+ yn_prompt('Would you like the periodic alive reminder '
+ 'to send alerts via Telegram? (Y/n)\n'):
+ telegram_enabled = str(True)
+ else:
+ telegram_enabled = str(False)
+
+ cp['periodic_alive_reminder']['enabled'] = str(True)
+ cp['periodic_alive_reminder']['interval_seconds'] = interval
+ cp['periodic_alive_reminder']['email_enabled'] = email_enabled
+ cp['periodic_alive_reminder']['telegram_enabled'] = telegram_enabled
+
+
def setup_all(cp: ConfigParser) -> None:
setup_general(cp)
print()
- setup_alerts(cp)
+ setup_alert_channels(cp)
+ print()
+ setup_periodic_alerts(cp)
print()
setup_commands(cp)
print()
setup_redis(cp)
+ print()
print('Setup finished.')
diff --git a/src/setup/setup_user_config_nodes.py b/src/setup/setup_user_config_nodes.py
index fbc1ea7..3beb21d 100644
--- a/src/setup/setup_user_config_nodes.py
+++ b/src/setup/setup_user_config_nodes.py
@@ -78,7 +78,6 @@ def setup_nodes(cp: ConfigParser) -> None:
break
# Add nodes to config
- cp.clear()
for i, node in enumerate(nodes):
section = 'node_' + str(i)
cp.add_section(section)
diff --git a/src/setup/setup_user_config_repos.py b/src/setup/setup_user_config_repos.py
index 3cc0100..11c2624 100644
--- a/src/setup/setup_user_config_repos.py
+++ b/src/setup/setup_user_config_repos.py
@@ -72,7 +72,6 @@ def setup_repos(cp: ConfigParser) -> None:
break
# Add repos to config
- cp.clear()
for i, repo in enumerate(repos):
section = 'repo_' + str(i)
cp.add_section(section)
diff --git a/src/utils/config_parsers/internal.py b/src/utils/config_parsers/internal.py
index f213f8f..e3e583c 100644
--- a/src/utils/config_parsers/internal.py
+++ b/src/utils/config_parsers/internal.py
@@ -1,4 +1,5 @@
import configparser
+import sys
from datetime import timedelta
from src.utils.config_parsers.config_parser import ConfigParser
@@ -36,7 +37,7 @@ def __init__(self, config_file_path: str) -> None:
# [redis]
section = cp['redis']
self.redis_database = int(section['redis_database'])
- self.redis_test_database = int(section['redis_rest_database'])
+ self.redis_test_database = int(section['redis_test_database'])
self.redis_twilio_snooze_key = section['redis_twilio_snooze_key']
self.redis_github_releases_key_prefix = section[
@@ -47,6 +48,8 @@ def __init__(self, config_file_path: str) -> None:
'redis_network_monitor_alive_key_prefix']
self.redis_network_monitor_last_height_key_prefix = section[
'redis_network_monitor_last_height_key_prefix']
+ self.redis_periodic_alive_reminder_mute_key = \
+ section['redis_periodic_alive_reminder_mute_key']
self.redis_node_monitor_alive_key_timeout = int(
section['redis_node_monitor_alive_key_timeout'])
@@ -59,6 +62,8 @@ def __init__(self, config_file_path: str) -> None:
section['node_monitor_period_seconds'])
self.network_monitor_period_seconds = int(
section['network_monitor_period_seconds'])
+ self.network_monitor_max_catch_up_blocks = int(
+ section['network_monitor_max_catch_up_blocks'])
self.github_monitor_period_seconds = int(
section['github_monitor_period_seconds'])
@@ -72,6 +77,9 @@ def __init__(self, config_file_path: str) -> None:
section['max_missed_blocks_in_time_interval'])
self.validator_peer_danger_boundary = int(
section['validator_peer_danger_boundary'])
+ self.validator_peer_safe_boundary = int(
+ section['validator_peer_safe_boundary'])
+ self._check_if_peer_safe_and_danger_boundaries_are_valid()
self.full_node_peer_danger_boundary = int(
section['full_node_peer_danger_boundary'])
self.missed_blocks_danger_boundary = int(
@@ -100,3 +108,17 @@ def __init__(self, config_file_path: str) -> None:
self.tx_mintscan_link_prefix = section['tx_mintscan_link_prefix']
self.github_releases_template = section['github_releases_template']
+
+ # Safe boundary must be greater than danger boundary at all times for
+ # correct execution
+ def _peer_safe_and_danger_boundaries_are_valid(self) -> bool:
+ return self.validator_peer_safe_boundary > \
+ self.validator_peer_danger_boundary > 0
+
+ def _check_if_peer_safe_and_danger_boundaries_are_valid(self):
+ while not self._peer_safe_and_danger_boundaries_are_valid():
+ print("validator_peer_safe_boundary must be STRICTLY GREATER than "
+ "validator_peer_danger_boundary for correct execution. "
+ "\nPlease do the necessary modifications in the "
+ "config/internal_config.ini file and restart the alerter.")
+ sys.exit(-1)
diff --git a/src/utils/config_parsers/user.py b/src/utils/config_parsers/user.py
index c7360b9..0eb2f75 100644
--- a/src/utils/config_parsers/user.py
+++ b/src/utils/config_parsers/user.py
@@ -1,4 +1,5 @@
import configparser
+from datetime import timedelta
from src.utils.config_parsers.config_parser import ConfigParser
@@ -80,6 +81,19 @@ def __init__(self, alerting_config_file_path: str,
self.redis_port = cp['redis']['port']
self.redis_password = cp['redis']['password']
+ # [periodic_alive_reminder]
+ self.periodic_alive_reminder_enabled = to_bool(
+ cp['periodic_alive_reminder']['enabled'])
+ self.interval_seconds = timedelta(
+ seconds=int(cp['periodic_alive_reminder']['interval_seconds'])) \
+ if self.periodic_alive_reminder_enabled else None
+ self.email_enabled = to_bool(
+ cp['periodic_alive_reminder']['email_enabled']) \
+ if self.periodic_alive_reminder_enabled else None
+ self.telegram_enabled = to_bool(
+ cp['periodic_alive_reminder']['telegram_enabled']) \
+ if self.periodic_alive_reminder_enabled else None
+
# ------------------------ Nodes Config
# [node_...]
diff --git a/test/__init__.py b/test/__init__.py
index 0ecb7b4..c66ecd7 100644
--- a/test/__init__.py
+++ b/test/__init__.py
@@ -1,6 +1,12 @@
+import os
+
from src.utils.config_parsers.internal import InternalConfig
from src.utils.config_parsers.user import UserConfig
+# Get path of this __init__.py file and go two steps up
+os.chdir(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+print('Current working directory set to ' + os.getcwd())
+
TestInternalConf = InternalConfig(
'test/test_internal_config.ini')
TestUserConf = UserConfig(
diff --git a/test/alerting/periodic/test_periodic.py b/test/alerting/periodic/test_periodic.py
new file mode 100644
index 0000000..a9da4f5
--- /dev/null
+++ b/test/alerting/periodic/test_periodic.py
@@ -0,0 +1,86 @@
+import logging
+import time
+import unittest
+from datetime import datetime, timedelta
+
+from redis import ConnectionError as RedisConnectionError
+
+from src.alerting.channels.channel import ChannelSet
+from src.alerting.periodic.periodic import send_alive_alert
+from src.utils.redis_api import RedisApi
+from src.utils.timing import TimedTaskLimiter
+from test import TestInternalConf, TestUserConf
+from test.node.test_node import CounterChannel
+
+
+class TestPeriodic(unittest.TestCase):
+ def setUp(self) -> None:
+ self.alerter_name = 'testalerter'
+ self.logger = logging.getLogger('dummy')
+ self.counter_channel = CounterChannel(self.logger)
+ self.channel_set = ChannelSet([self.counter_channel])
+
+ self.db = TestInternalConf.redis_test_database
+ self.host = TestUserConf.redis_host
+ self.port = TestUserConf.redis_port
+ self.password = TestUserConf.redis_password
+ self.redis = RedisApi(self.logger, self.db, self.host,
+ self.port, self.password)
+ self.redis.delete_all_unsafe()
+
+ try:
+ self.redis.ping_unsafe()
+ except RedisConnectionError:
+ self.fail('Redis is not online.')
+
+ self.timedelta = TestUserConf.interval_seconds
+ self.timing = TimedTaskLimiter(self.timedelta)
+ self.mute_key = TestInternalConf.redis_periodic_alive_reminder_mute_key
+
+ def test_periodic_alive_reminder_can_do_task_no_mute_key(self):
+ self.timing.did_task()
+ time.sleep(TestUserConf.interval_seconds.seconds)
+ self.counter_channel.reset() # ignore previous alerts
+ send_alive_alert(self.timing, self.redis, self.mute_key, self.channel_set)
+ self.assertEqual(self.counter_channel.minor_count, 0)
+ self.assertEqual(self.counter_channel.major_count, 0)
+ self.assertEqual(self.counter_channel.info_count, 1)
+ self.assertEqual(self.counter_channel.error_count, 0)
+
+ def test_periodic_alive_reminder_cannot_do_task_no_mute_key(self):
+ self.timing.did_task()
+ time.sleep(TestUserConf.interval_seconds.seconds - 2)
+ self.counter_channel.reset() # ignore previous alerts
+ send_alive_alert(self.timing, self.redis, self.mute_key, self.channel_set)
+ self.assertEqual(self.counter_channel.minor_count, 0)
+ self.assertEqual(self.counter_channel.major_count, 0)
+ self.assertEqual(self.counter_channel.info_count, 0)
+ self.assertEqual(self.counter_channel.error_count, 0)
+
+ def test_periodic_alive_reminder_can_do_task_mute_key_present(self):
+ self.timing.did_task()
+ time.sleep(TestUserConf.interval_seconds.seconds)
+ hours = timedelta(hours=float(1))
+ until = str(datetime.now() + hours)
+ self.redis.set_for(self.mute_key, until, hours)
+ self.counter_channel.reset() # ignore previous alerts
+ send_alive_alert(self.timing, self.redis, self.mute_key, self.channel_set)
+ self.redis.remove(self.mute_key)
+ self.assertEqual(self.counter_channel.minor_count, 0)
+ self.assertEqual(self.counter_channel.major_count, 0)
+ self.assertEqual(self.counter_channel.info_count, 0)
+ self.assertEqual(self.counter_channel.error_count, 0)
+
+ def test_periodic_alive_reminder_cannot_do_task_mute_key_present(self):
+ self.timing.did_task()
+ time.sleep(TestUserConf.interval_seconds.seconds - 3)
+ hours = timedelta(hours=float(1))
+ until = str(datetime.now() + hours)
+ self.redis.set_for(self.mute_key, until, hours)
+ self.counter_channel.reset() # ignore previous alerts
+ send_alive_alert(self.timing, self.redis, self.mute_key, self.channel_set)
+ self.redis.remove(self.mute_key)
+ self.assertEqual(self.counter_channel.minor_count, 0)
+ self.assertEqual(self.counter_channel.major_count, 0)
+ self.assertEqual(self.counter_channel.info_count, 0)
+ self.assertEqual(self.counter_channel.error_count, 0)
diff --git a/test/node/test_node.py b/test/node/test_node.py
index fd54283..ae8e752 100644
--- a/test/node/test_node.py
+++ b/test/node/test_node.py
@@ -6,8 +6,8 @@
import dateutil
from redis import ConnectionError as RedisConnectionError
-from src.alerting.channels.channel import ChannelSet, Channel
from src.alerting.alerts.alerts import Alert
+from src.alerting.channels.channel import ChannelSet, Channel
from src.node.node import Node, NodeType
from src.utils.redis_api import RedisApi
from test import TestInternalConf, TestUserConf
@@ -99,6 +99,13 @@ def setUp(self) -> None:
self.peers_more_than_validator_danger_boundary = \
self.peers_validator_danger_boundary + 2
+ self.peers_validator_safe_boundary = \
+ TestInternalConf.validator_peer_safe_boundary
+ self.peers_less_than_validator_safe_boundary = \
+ self.peers_validator_safe_boundary - 2
+ self.peers_more_than_validator_safe_boundary = \
+ self.peers_validator_safe_boundary + 2
+
self.peers_full_node_danger_boundary = \
TestInternalConf.full_node_peer_danger_boundary
self.peers_less_than_full_node_danger_boundary = \
@@ -140,65 +147,65 @@ def test_status_returns_three_values(self):
'number_of_peers=999')
def test_first_set_as_down_sends_info_alert_and_sets_node_to_down(self):
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.assertEqual(self.counter_channel.info_count, 1)
self.assertTrue(self.validator.is_down)
def test_second_set_as_down_sends_major_alert_if_validator(self):
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.assertEqual(self.counter_channel.major_count, 1)
self.assertTrue(self.validator.is_down)
def test_second_set_as_down_sends_minor_alert_if_non_validator(self):
- self.full_node.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.full_node.set_as_down(self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
- self.full_node.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.full_node.set_as_down(self.channel_set, self.logger)
self.assertEqual(self.counter_channel.minor_count, 1)
self.assertTrue(self.full_node.is_down)
def test_third_set_as_down_does_nothing_if_within_time_interval_for_validator(
self):
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.assertTrue(self.counter_channel.no_alerts())
self.assertTrue(self.validator.is_down)
def test_third_set_as_down_does_nothing_if_within_time_interval_for_non_validator(
self):
- self.full_node.set_as_down(self.channel_set, self.dummy_exception, self.logger)
- self.full_node.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.full_node.set_as_down(self.channel_set, self.logger)
+ self.full_node.set_as_down(self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
- self.full_node.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.full_node.set_as_down(self.channel_set, self.logger)
self.assertTrue(self.counter_channel.no_alerts())
self.assertTrue(self.full_node.is_down)
def test_third_set_as_down_sends_major_alert_if_after_time_interval_for_validator(
self):
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
sleep(self.downtime_alert_time_interval_with_error_margin.seconds)
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.assertEqual(self.counter_channel.major_count, 1)
self.assertTrue(self.validator.is_down)
def test_third_set_as_down_sends_minor_alert_if_after_time_interval_for_non_validator(
self):
- self.full_node.set_as_down(self.channel_set, self.dummy_exception, self.logger)
- self.full_node.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.full_node.set_as_down(self.channel_set, self.logger)
+ self.full_node.set_as_down(self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
sleep(self.downtime_alert_time_interval_with_error_margin.seconds)
- self.full_node.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.full_node.set_as_down(self.channel_set, self.logger)
self.assertEqual(self.counter_channel.minor_count, 1)
self.assertTrue(self.full_node.is_down)
@@ -210,7 +217,7 @@ def test_set_as_up_does_nothing_if_not_down(self):
def test_set_as_up_sets_as_up_but_no_alerts_if_set_as_down_called_only_once(
self):
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
self.validator.set_as_up(self.channel_set, self.logger)
@@ -219,8 +226,8 @@ def test_set_as_up_sets_as_up_but_no_alerts_if_set_as_down_called_only_once(
def test_set_as_up_sets_as_up_and_sends_info_alert_if_set_as_down_called_twice(
self):
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
self.validator.set_as_up(self.channel_set, self.logger)
@@ -228,14 +235,14 @@ def test_set_as_up_sets_as_up_and_sends_info_alert_if_set_as_down_called_twice(
self.assertFalse(self.validator.is_down)
def test_set_as_up_resets_alert_time_interval(self):
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.validator.set_as_up(self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
- self.validator.set_as_down(self.channel_set, self.dummy_exception, self.logger)
+ self.validator.set_as_down(self.channel_set, self.logger)
self.assertEqual(self.counter_channel.info_count, 1)
self.assertTrue(self.validator.is_down)
@@ -388,8 +395,10 @@ def test_set_voting_power_raises_no_alerts_first_time_round(self):
self.assertTrue(self.counter_channel.no_alerts())
def test_set_voting_power_raises_no_alerts_if_voting_power_the_same(self):
- self.validator.set_voting_power(self.dummy_voting_power, self.channel_set, self.logger)
- self.validator.set_voting_power(self.dummy_voting_power, self.channel_set, self.logger)
+ self.validator.set_voting_power(self.dummy_voting_power,
+ self.channel_set, self.logger)
+ self.validator.set_voting_power(self.dummy_voting_power,
+ self.channel_set, self.logger)
self.assertTrue(self.counter_channel.no_alerts())
@@ -397,8 +406,10 @@ def test_set_voting_power_raises_info_alert_if_voting_power_increases_from_non_0
self):
increased_voting_power = self.dummy_voting_power + 1
- self.validator.set_voting_power(self.dummy_voting_power, self.channel_set, self.logger)
- self.validator.set_voting_power(increased_voting_power, self.channel_set, self.logger)
+ self.validator.set_voting_power(self.dummy_voting_power,
+ self.channel_set, self.logger)
+ self.validator.set_voting_power(increased_voting_power,
+ self.channel_set, self.logger)
self.assertEqual(self.counter_channel.info_count, 1)
@@ -407,7 +418,8 @@ def test_set_voting_power_raises_info_alert_if_voting_power_increases_from_0(
# This is just to cover the unique message when power increases from 0
self.validator.set_voting_power(0, self.channel_set, self.logger)
- self.validator.set_voting_power(self.dummy_voting_power, self.channel_set, self.logger)
+ self.validator.set_voting_power(self.dummy_voting_power,
+ self.channel_set, self.logger)
self.assertEqual(self.counter_channel.info_count, 1)
@@ -415,14 +427,17 @@ def test_set_voting_power_raises_info_alert_if_voting_power_decreases_to_non_0(
self):
decreased_voting_power = self.dummy_voting_power - 1
- self.validator.set_voting_power(self.dummy_voting_power, self.channel_set, self.logger)
- self.validator.set_voting_power(decreased_voting_power, self.channel_set, self.logger)
+ self.validator.set_voting_power(self.dummy_voting_power,
+ self.channel_set, self.logger)
+ self.validator.set_voting_power(decreased_voting_power,
+ self.channel_set, self.logger)
self.assertEqual(self.counter_channel.info_count, 1)
def test_set_voting_power_raises_major_alert_if_voting_power_decreases_to_0(
self):
- self.validator.set_voting_power(self.dummy_voting_power, self.channel_set, self.logger)
+ self.validator.set_voting_power(self.dummy_voting_power,
+ self.channel_set, self.logger)
self.validator.set_voting_power(0, self.channel_set, self.logger)
self.assertEqual(self.counter_channel.major_count, 1)
@@ -467,24 +482,32 @@ def test_set_catching_up_raises_info_alert_if_from_true_to_false(self):
def test_set_no_of_peers_raises_no_alerts_first_time_round_for_validator(
self):
- self.validator.set_no_of_peers(self.dummy_no_of_peers, self.channel_set, self.logger)
+ self.validator.set_no_of_peers(self.dummy_no_of_peers, self.channel_set,
+ self.logger)
self.assertTrue(self.counter_channel.no_alerts())
def test_set_no_of_peers_raises_no_alerts_first_time_round_for_full_node(
self):
- self.full_node.set_no_of_peers(self.dummy_no_of_peers, self.channel_set, self.logger)
+ self.full_node.set_no_of_peers(self.dummy_no_of_peers, self.channel_set,
+ self.logger)
self.assertTrue(self.counter_channel.no_alerts())
- def test_set_no_of_peers_raises_info_alert_if_increase_for_validator(self):
+ def test_set_no_of_peers_raises_no_alerts_if_increase_for_validator_if_outside_safe_range(
+ self):
increased_no_of_peers = self.dummy_no_of_peers + 1
- self.validator.set_no_of_peers(self.dummy_no_of_peers, self.channel_set, self.logger)
+ self.validator.set_no_of_peers(self.dummy_no_of_peers, self.channel_set,
+ self.logger)
self.counter_channel.reset() # ignore previous alerts
- self.validator.set_no_of_peers(increased_no_of_peers, self.channel_set, self.logger)
+ self.validator.set_no_of_peers(increased_no_of_peers, self.channel_set,
+ self.logger)
- self.assertEqual(self.counter_channel.info_count, 1)
+ self.assertEqual(self.counter_channel.minor_count, 0)
+ self.assertEqual(self.counter_channel.major_count, 0)
+ self.assertEqual(self.counter_channel.info_count, 0)
+ self.assertEqual(self.counter_channel.error_count, 0)
def test_set_no_of_peers_raises_info_alert_if_increase_for_full_node_if_inside_danger(
self):
@@ -517,7 +540,7 @@ def test_set_no_of_peers_raises_info_alert_if_increase_for_full_node_if_inside_t
self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
self.full_node.set_no_of_peers(
- self.peers_full_node_danger_boundary,
+ self.peers_more_than_full_node_danger_boundary,
self.channel_set, self.logger)
self.assertEqual(self.counter_channel.info_count, 1)
@@ -525,27 +548,39 @@ def test_set_no_of_peers_raises_info_alert_if_increase_for_full_node_if_inside_t
def test_set_no_of_peers_raises_info_alert_if_increase_for_validator_if_inside_danger(
self):
self.validator.set_no_of_peers(
- self.peers_less_than_full_node_danger_boundary,
+ self.peers_less_than_validator_danger_boundary,
self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
self.validator.set_no_of_peers(
- self.peers_less_than_full_node_danger_boundary + 1,
+ self.peers_less_than_validator_danger_boundary + 1,
self.channel_set, self.logger)
self.assertEqual(self.counter_channel.info_count, 1)
- def test_set_no_of_peers_raises_info_alert_if_increase_for_validator_if_outside_danger(
+ def test_set_no_of_peers_raises_info_alert_if_increase_for_validator_if_outside_danger_inside_safe(
self):
self.validator.set_no_of_peers(
- self.peers_more_than_full_node_danger_boundary,
+ self.peers_validator_danger_boundary,
self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
self.validator.set_no_of_peers(
- self.peers_more_than_full_node_danger_boundary + 1,
+ self.peers_validator_danger_boundary + 1,
self.channel_set, self.logger)
self.assertEqual(self.counter_channel.info_count, 1)
+ def test_set_no_of_peers_raises_info_alert_if_decrease_for_validator_if_outside_danger_inside_safe(
+ self):
+ self.validator.set_no_of_peers(
+ self.peers_validator_safe_boundary,
+ self.channel_set, self.logger)
+ self.counter_channel.reset() # ignore previous alerts
+ self.validator.set_no_of_peers(
+ self.peers_validator_safe_boundary - 1,
+ self.channel_set, self.logger)
+
+ self.assertEqual(self.counter_channel.minor_count, 1)
+
def test_set_no_of_peers_raises_minor_alert_if_decrease_for_full_node_if_inside_danger(
self):
self.full_node.set_no_of_peers(
@@ -573,7 +608,7 @@ def test_set_no_of_peers_raises_no_alerts_if_decrease_for_full_node_if_outside_d
def test_set_no_of_peers_raises_major_alert_if_decrease_for_validator_if_inside_danger(
self):
self.validator.set_no_of_peers(
- self.peers_more_than_validator_danger_boundary,
+ self.peers_validator_danger_boundary,
self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
self.validator.set_no_of_peers(
@@ -582,18 +617,48 @@ def test_set_no_of_peers_raises_major_alert_if_decrease_for_validator_if_inside_
self.assertEqual(self.counter_channel.major_count, 1)
- def test_set_no_of_peers_raises_minor_alert_if_decrease_for_validator_if_outside_danger(
+ def test_set_no_of_peers_raises_minor_alert_if_decrease_for_validator_if_outside_danger_inside_safe(
self):
self.validator.set_no_of_peers(
- self.peers_more_than_validator_danger_boundary,
+ self.peers_validator_safe_boundary,
self.channel_set, self.logger)
self.counter_channel.reset() # ignore previous alerts
self.validator.set_no_of_peers(
- self.peers_more_than_validator_danger_boundary - 1,
+ self.peers_validator_safe_boundary - 1,
self.channel_set, self.logger)
self.assertEqual(self.counter_channel.minor_count, 1)
+ def test_set_no_of_peers_raises_no_alerts_if_decrease_for_validator_if_outside_safe(
+ self):
+ self.validator.set_no_of_peers(
+ self.peers_more_than_validator_safe_boundary,
+ self.channel_set, self.logger)
+ self.counter_channel.reset() # ignore previous alerts
+ self.validator.set_no_of_peers(
+ self.peers_more_than_validator_safe_boundary - 1,
+ self.channel_set, self.logger)
+
+ self.assertEqual(self.counter_channel.minor_count, 0)
+ self.assertEqual(self.counter_channel.major_count, 0)
+ self.assertEqual(self.counter_channel.info_count, 0)
+ self.assertEqual(self.counter_channel.error_count, 0)
+
+ def test_set_no_of_peers_raises_info_alert_if_increase_for_validator_outside_safe_for_first_time(
+ self):
+ self.validator.set_no_of_peers(
+ self.peers_less_than_validator_safe_boundary,
+ self.channel_set, self.logger)
+ self.counter_channel.reset() # ignore previous alerts
+ self.validator.set_no_of_peers(
+ self.peers_more_than_validator_safe_boundary,
+ self.channel_set, self.logger)
+
+ self.assertEqual(self.counter_channel.minor_count, 0)
+ self.assertEqual(self.counter_channel.major_count, 0)
+ self.assertEqual(self.counter_channel.info_count, 1)
+ self.assertEqual(self.counter_channel.error_count, 0)
+
class TestNodeWithRedis(unittest.TestCase):
@@ -615,6 +680,8 @@ def setUpClass(cls) -> None:
def setUp(self) -> None:
self.node_name = 'testnode'
+ self.network_name = 'testnetwork'
+ self.redis_prefix = self.node_name + "@" + self.network_name
self.date = datetime.min + timedelta(days=123)
self.logger = logging.getLogger('dummy')
@@ -633,11 +700,13 @@ def setUp(self) -> None:
self.non_validator = Node(name=self.node_name, rpc_url=None,
node_type=NodeType.NON_VALIDATOR_FULL_NODE,
- pubkey=None, network='', redis=self.redis)
+ pubkey=None, network=self.network_name,
+ redis=self.redis)
self.validator = Node(name=self.node_name, rpc_url=None,
node_type=NodeType.VALIDATOR_FULL_NODE,
- pubkey=None, network='', redis=self.redis)
+ pubkey=None, network=self.network_name,
+ redis=self.redis)
def test_load_state_changes_nothing_if_nothing_saved(self):
self.validator.load_state(self.logger)
@@ -651,12 +720,13 @@ def test_load_state_changes_nothing_if_nothing_saved(self):
def test_load_state_sets_values_to_saved_values(self):
# Set Redis values manually
- self.redis.set_unsafe(self.node_name + '_went_down_at', str(self.date))
- self.redis.set_unsafe(self.node_name + '_consecutive_blocks_missed',
+ self.redis.set_unsafe(self.redis_prefix + '_went_down_at',
+ str(self.date))
+ self.redis.set_unsafe(self.redis_prefix + '_consecutive_blocks_missed',
123)
- self.redis.set_unsafe(self.node_name + '_voting_power', 456)
- self.redis.set_unsafe(self.node_name + '_catching_up', str(True))
- self.redis.set_unsafe(self.node_name + '_no_of_peers', 789)
+ self.redis.set_unsafe(self.redis_prefix + '_voting_power', 456)
+ self.redis.set_unsafe(self.redis_prefix + '_catching_up', str(True))
+ self.redis.set_unsafe(self.redis_prefix + '_no_of_peers', 789)
# Load the Redis values
self.validator.load_state(self.logger)
@@ -670,7 +740,7 @@ def test_load_state_sets_values_to_saved_values(self):
def test_load_state_sets_went_down_at_to_none_if_incorrect_type(self):
# Set Redis values manually
- self.redis.set_unsafe(self.node_name + '_went_down_at', str(True))
+ self.redis.set_unsafe(self.redis_prefix + '_went_down_at', str(True))
# Load the Redis values
self.validator.load_state(self.logger)
@@ -692,13 +762,13 @@ def test_save_state_sets_values_to_current_values(self):
# Assert
self.assertEqual(
dateutil.parser.parse(self.redis.get_unsafe(
- self.node_name + '_went_down_at')), self.date)
+ self.redis_prefix + '_went_down_at')), self.date)
self.assertEqual(
self.redis.get_int_unsafe(
- self.node_name + '_consecutive_blocks_missed'), 123)
+ self.redis_prefix + '_consecutive_blocks_missed'), 123)
self.assertEqual(
- self.redis.get_int_unsafe(self.node_name + '_voting_power'), 456)
+ self.redis.get_int_unsafe(self.redis_prefix + '_voting_power'), 456)
self.assertTrue(
- self.redis.get_bool_unsafe(self.node_name + '_catching_up'))
+ self.redis.get_bool_unsafe(self.redis_prefix + '_catching_up'))
self.assertEqual(
- self.redis.get_int_unsafe(self.node_name + '_no_of_peers'), 789)
+ self.redis.get_int_unsafe(self.redis_prefix + '_no_of_peers'), 789)
diff --git a/test/test_internal_config.ini b/test/test_internal_config.ini
index b4312ac..54aa1b6 100644
--- a/test/test_internal_config.ini
+++ b/test/test_internal_config.ini
@@ -18,13 +18,14 @@ twiml_instructions_url = https://twimlets.com/echo
[redis]
redis_database = 10
-redis_rest_database = 11
+redis_test_database = 11
redis_twilio_snooze_key = twilio_snooze
redis_github_releases_key_prefix = github_releases_
redis_node_monitor_alive_key_prefix = node_monitor_alive_
redis_network_monitor_alive_key_prefix = network_monitor_alive_
redis_network_monitor_last_height_key_prefix = network_monitor_last_height_checked_
+redis_periodic_alive_reminder_mute_key = alive_reminder_mute
redis_node_monitor_alive_key_timeout = 86400
redis_network_monitor_alive_key_timeout = 86400
@@ -34,6 +35,7 @@ redis_network_monitor_alive_key_timeout = 86400
[monitoring_periods]
node_monitor_period_seconds = 10
network_monitor_period_seconds = 10
+network_monitor_max_catch_up_blocks = 500
github_monitor_period_seconds = 300
# These define how often a monitor runs an iteration of its monitoring loop
@@ -42,7 +44,8 @@ downtime_alert_interval_seconds = 3
max_missed_blocks_interval_seconds = 4
max_missed_blocks_in_time_interval = 10
validator_peer_danger_boundary = 1
-full_node_peer_danger_boundary = 15
+validator_peer_safe_boundary = 3
+full_node_peer_danger_boundary = 10
missed_blocks_danger_boundary = 5
github_error_interval_seconds = 3600
# These limit the number of alerts of a specific type received using either
@@ -70,4 +73,4 @@ tx_mintscan_link_prefix = https://www.mintscan.io/txs/
github_releases_template = https://api.github.com/repos/{}releases
# This is a Python template string, where {} is replaced with (for example) cosmos/cosmos-sdk/
-# so that the complete link becomes: https://api.github.com/repos/cosmos/cosmos-sdk/releases
\ No newline at end of file
+# so that the complete link becomes: https://api.github.com/repos/cosmos/cosmos-sdk/releases
diff --git a/test/test_user_config_main.ini b/test/test_user_config_main.ini
index 3a8a6a5..9624b16 100644
--- a/test/test_user_config_main.ini
+++ b/test/test_user_config_main.ini
@@ -32,3 +32,8 @@ host = localhost
port = 6379
password =
+[periodic_alive_reminder]
+enabled = False
+interval_seconds = 10
+email_enabled = False
+telegram_enabled = True
diff --git a/test/test_user_config_nodes.ini b/test/test_user_config_nodes.ini
index e0b5f3f..acb82c7 100644
--- a/test/test_user_config_nodes.ini
+++ b/test/test_user_config_nodes.ini
@@ -24,4 +24,4 @@ node_name = Sentry 2
node_rpc_url = http://11.22.33.44:26657
node_is_validator = false
include_in_node_monitor = false
-include_in_network_monitor = false
\ No newline at end of file
+include_in_network_monitor = false
diff --git a/test/test_user_config_repos.ini b/test/test_user_config_repos.ini
index 25a664f..3a2ab27 100644
--- a/test/test_user_config_repos.ini
+++ b/test/test_user_config_repos.ini
@@ -6,4 +6,4 @@ include_in_github_monitor = true
[repo_2]
repo_name = Gaia
repo_page = cosmos/gaia/
-include_in_github_monitor = false
\ No newline at end of file
+include_in_github_monitor = false
diff --git a/test/utils/test_redis_api.py b/test/utils/test_redis_api.py
index 57f4983..53aa4d5 100644
--- a/test/utils/test_redis_api.py
+++ b/test/utils/test_redis_api.py
@@ -5,7 +5,7 @@
from time import sleep
from redis import ConnectionError as RedisConnectionError, DataError, \
- ResponseError
+ AuthenticationError
from src.utils.redis_api import RedisApi
from test import TestInternalConf, TestUserConf
@@ -80,8 +80,8 @@ def test_set_unsafe_throws_exception_if_incorrect_password(self):
self.redis.set_unsafe(self.key1, self.val1) # works
try:
redis_bad_pass.set_unsafe(self.key1, self.val1)
- self.fail('Expected ResponseError to be thrown')
- except ResponseError:
+ self.fail('Expected AuthenticationError to be thrown')
+ except AuthenticationError:
pass
def test_set_unsafe_sets_the_specified_key_to_the_specified_value(self):