Skip to content

Commit

Permalink
Merge pull request #4 from faros-ai/master
Browse files Browse the repository at this point in the history
Added UnHealthyHostCount metric + cleanup warnings
  • Loading branch information
lorenzoaiello authored Feb 1, 2022
2 parents 3bdbbc0 + fd10f77 commit 4920339
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 11 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ Alarms Always Created:
- Any 5xx errors from the target group
- Any 5xx errors from the load balancer
- Unacceptably high average response times
- Number of unhealthy hosts

**Estimated Operating Cost**: $ 0.30 / month
**Estimated Operating Cost**: $ 0.40 / month

- $ 0.10 / month for Metric Alarms (3x)
- $ 0.10 / month for Metric Alarms (4x)

## Example

Expand All @@ -37,6 +38,7 @@ module "aws-alb-alarms" {
| load\_balancer\_id | ALB ID | `string` | n/a | yes |
| prefix | Alarm Name Prefix | `string` | `""` | no |
| response\_time\_threshold | The average number of milliseconds that requests should complete within. | `string` | `"50"` | no |
| unhealthy\_hosts\_threshold | The number of unhealthy hosts. | `string` | `"0"` | no |
| statistic\_period | The number of seconds that make each statistic period. | `string` | `"60"` | no |
| target\_group\_id | Target Group ID | `string` | n/a | yes |

Expand Down
21 changes: 20 additions & 1 deletion main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,26 @@ resource "aws_cloudwatch_metric_alarm" "target_response_time_average" {
period = var.statistic_period
statistic = "Average"
threshold = var.response_time_threshold
alarm_description = "Average API response time is too high"
alarm_description = format("Average API response time is greater than %s", var.response_time_threshold)
alarm_actions = var.actions_alarm
ok_actions = var.actions_ok

dimensions = {
"TargetGroup" = var.target_group_id
"LoadBalancer" = var.load_balancer_id
}
}

resource "aws_cloudwatch_metric_alarm" "unhealthy_hosts" {
alarm_name = "${var.prefix}alb-tg-${var.target_group_id}-unhealthy-hosts"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = var.evaluation_period
metric_name = "UnHealthyHostCount"
namespace = "AWS/ApplicationELB"
period = var.statistic_period
statistic = "Minimum"
threshold = var.unhealthy_hosts_threshold
alarm_description = format("Unhealthy host count is greater than %s", var.unhealthy_hosts_threshold)
alarm_actions = var.actions_alarm
ok_actions = var.actions_ok

Expand Down
22 changes: 14 additions & 8 deletions variables.tf
Original file line number Diff line number Diff line change
@@ -1,45 +1,51 @@
variable "load_balancer_id" {
type = "string"
type = string
description = "ALB ID"
}

variable "target_group_id" {
type = "string"
type = string
description = "Target Group ID"
}

variable "prefix" {
type = "string"
type = string
default = ""
description = "Alarm Name Prefix"
}

variable "response_time_threshold" {
type = "string"
type = string
default = "50"
description = "The average number of milliseconds that requests should complete within."
}

variable "unhealthy_hosts_threshold" {
type = string
default = "0"
description = "The number of unhealthy hosts."
}

variable "evaluation_period" {
type = "string"
type = string
default = "5"
description = "The evaluation period over which to use when triggering alarms."
}

variable "statistic_period" {
type = "string"
type = string
default = "60"
description = "The number of seconds that make each statistic period."
}

variable "actions_alarm" {
type = "list"
type = list(string)
default = []
description = "A list of actions to take when alarms are triggered. Will likely be an SNS topic for event distribution."
}

variable "actions_ok" {
type = "list"
type = list(string)
default = []
description = "A list of actions to take when alarms are cleared. Will likely be an SNS topic for event distribution."
}

0 comments on commit 4920339

Please sign in to comment.