From bf0fc254cd5ff26648e4176984164e45d0b532f0 Mon Sep 17 00:00:00 2001 From: Matthew Tovbin Date: Wed, 16 Mar 2022 11:33:58 -0700 Subject: [PATCH 1/3] Added HealthyHostCount alarm --- README.md | 35 +++++++++++++++++++---------------- main.tf | 20 ++++++++++++++++++++ variables.tf | 6 ++++++ 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 3d4874b..f8e5ea6 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,14 @@ This Terraform module manages Cloudwatch Alarms for an ALB in the region. It does NOT create or manage Load Balancers, only Metric Alarms. **Requires**: + - AWS Provider - Terraform 0.12 ## Alarms Created Alarms Always Created: + - Any 5xx errors from the target group - Any 5xx errors from the load balancer - Unacceptably high average response times @@ -30,22 +32,23 @@ module "aws-alb-alarms" { ## Variables -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:-----:| -| actions\_alarm | A list of actions to take when alarms are triggered. Will likely be an SNS topic for event distribution. | `list` | `[]` | no | -| actions\_ok | A list of actions to take when alarms are cleared. Will likely be an SNS topic for event distribution. | `list` | `[]` | no | -| evaluation\_period | The evaluation period over which to use when triggering alarms. | `string` | `"5"` | no | -| load\_balancer\_id | ALB ID | `string` | n/a | yes | -| prefix | Alarm Name Prefix | `string` | `""` | no | -| response\_time\_threshold | The average number of milliseconds that requests should complete within. | `string` | `"50"` | no | -| unhealthy\_hosts\_threshold | The number of unhealthy hosts. | `string` | `"0"` | no | -| statistic\_period | The number of seconds that make each statistic period. | `string` | `"60"` | no | -| target\_group\_id | Target Group ID | `string` | n/a | yes | +| Name | Description | Type | Default | Required | +| ------------------------- | -------------------------------------------------------------------------------------------------------- | -------- | ------- | :------: | +| actions_alarm | A list of actions to take when alarms are triggered. Will likely be an SNS topic for event distribution. | `list` | `[]` | no | +| actions_ok | A list of actions to take when alarms are cleared. Will likely be an SNS topic for event distribution. | `list` | `[]` | no | +| evaluation_period | The evaluation period over which to use when triggering alarms. | `string` | `"5"` | no | +| load_balancer_id | ALB ID | `string` | n/a | yes | +| prefix | Alarm Name Prefix | `string` | `""` | no | +| response_time_threshold | The average number of milliseconds that requests should complete within. | `string` | `"50"` | no | +| unhealthy_hosts_threshold | The number of unhealthy hosts. | `string` | `"0"` | no | +| healthy_hosts_threshold | The number of healthy hosts. | `string` | `"0"` | no | +| statistic_period | The number of seconds that make each statistic period. | `string` | `"60"` | no | +| target_group_id | Target Group ID | `string` | n/a | yes | ## Outputs -| Name | Description | -|------|-------------| -| alarm\_httpcode\_lb\_5xx\_count | The CloudWatch Metric Alarm resource block for 5xx errors on the load balancer | -| alarm\_httpcode\_target\_5xx\_counts | The CloudWatch Metric Alarm resource block for 5xx errors on the target group | -| alarm\_target\_response\_time\_average | The CloudWatch Metric Alarm resource block for unacceptably high response time averages | +| Name | Description | +| ---------------------------------- | --------------------------------------------------------------------------------------- | +| alarm_httpcode_lb_5xx_count | The CloudWatch Metric Alarm resource block for 5xx errors on the load balancer | +| alarm_httpcode_target_5xx_counts | The CloudWatch Metric Alarm resource block for 5xx errors on the target group | +| alarm_target_response_time_average | The CloudWatch Metric Alarm resource block for unacceptably high response time averages | diff --git a/main.tf b/main.tf index 8ebe3b8..ed4758a 100644 --- a/main.tf +++ b/main.tf @@ -72,3 +72,23 @@ resource "aws_cloudwatch_metric_alarm" "unhealthy_hosts" { "LoadBalancer" = var.load_balancer_id } } + + +resource "aws_cloudwatch_metric_alarm" "healthy_hosts" { + alarm_name = "${var.prefix}alb-tg-${var.target_group_id}-healthy-hosts" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.evaluation_period + metric_name = "HealthyHostCount" + namespace = "AWS/ApplicationELB" + period = var.statistic_period + statistic = "Minimum" + threshold = var.healthy_hosts_threshold + alarm_description = format("Healthy host count is greater than %s", var.healthy_hosts_threshold) + alarm_actions = var.actions_alarm + ok_actions = var.actions_ok + + dimensions = { + "TargetGroup" = var.target_group_id + "LoadBalancer" = var.load_balancer_id + } +} diff --git a/variables.tf b/variables.tf index f6b96e9..b843a33 100644 --- a/variables.tf +++ b/variables.tf @@ -26,6 +26,12 @@ variable "unhealthy_hosts_threshold" { description = "The number of unhealthy hosts." } +variable "healthy_hosts_threshold" { + type = string + default = "0" + description = "The number of healthy hosts." +} + variable "evaluation_period" { type = string default = "5" From 350e3130fb9a45b23e26d5be558542e2ed454af1 Mon Sep 17 00:00:00 2001 From: Matthew Tovbin Date: Wed, 16 Mar 2022 11:48:16 -0700 Subject: [PATCH 2/3] small updates --- README.md | 9 +++++---- main.tf | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f8e5ea6..93821e8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# Terraform Module for AWS ALB Cloudwatch Alarms +# Terraform Module for AWS ALB CloudWatch Alarms -This Terraform module manages Cloudwatch Alarms for an ALB in the region. It does NOT create or manage Load Balancers, only Metric Alarms. +This Terraform module manages CloudWatch Alarms for an ALB in the region. It does NOT create or manage Load Balancers, only Metric Alarms. **Requires**: @@ -15,10 +15,11 @@ Alarms Always Created: - Any 5xx errors from the load balancer - Unacceptably high average response times - Number of unhealthy hosts +- Number of healthy hosts -**Estimated Operating Cost**: $ 0.40 / month +**Estimated Operating Cost**: $ 0.50 / month -- $ 0.10 / month for Metric Alarms (4x) +- $ 0.10 / month for Metric Alarms (5x) ## Example diff --git a/main.tf b/main.tf index ed4758a..dbd2909 100644 --- a/main.tf +++ b/main.tf @@ -73,7 +73,6 @@ resource "aws_cloudwatch_metric_alarm" "unhealthy_hosts" { } } - resource "aws_cloudwatch_metric_alarm" "healthy_hosts" { alarm_name = "${var.prefix}alb-tg-${var.target_group_id}-healthy-hosts" comparison_operator = "GreaterThanThreshold" From 20c85bd9a649d9f21a1637eedbca32821b1bdac4 Mon Sep 17 00:00:00 2001 From: Matthew Tovbin Date: Wed, 16 Mar 2022 11:56:16 -0700 Subject: [PATCH 3/3] fix the condition --- main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.tf b/main.tf index dbd2909..51e2187 100644 --- a/main.tf +++ b/main.tf @@ -75,14 +75,14 @@ resource "aws_cloudwatch_metric_alarm" "unhealthy_hosts" { resource "aws_cloudwatch_metric_alarm" "healthy_hosts" { alarm_name = "${var.prefix}alb-tg-${var.target_group_id}-healthy-hosts" - comparison_operator = "GreaterThanThreshold" + comparison_operator = "LessThanOrEqualToThreshold" evaluation_periods = var.evaluation_period metric_name = "HealthyHostCount" namespace = "AWS/ApplicationELB" period = var.statistic_period statistic = "Minimum" threshold = var.healthy_hosts_threshold - alarm_description = format("Healthy host count is greater than %s", var.healthy_hosts_threshold) + alarm_description = format("Healthy host count is less than or equal to %s", var.healthy_hosts_threshold) alarm_actions = var.actions_alarm ok_actions = var.actions_ok