From a76f31e313ebd96721c230bec3a03d0de6d3cd0c Mon Sep 17 00:00:00 2001 From: Carter McKinnon Date: Sun, 17 Dec 2023 11:38:24 +0000 Subject: [PATCH] Add metrics registry to kubetest2 --- go.work.sum | 1 + kubetest2/go.mod | 10 +- kubetest2/go.sum | 12 +++ .../internal/deployers/eksapi/deployer.go | 33 ++++++- kubetest2/internal/deployers/eksapi/infra.go | 27 +++++- .../internal/deployers/eksapi/janitor.go | 14 ++- kubetest2/internal/metrics/cloudwatch.go | 93 +++++++++++++++++++ kubetest2/internal/metrics/noop.go | 13 +++ kubetest2/internal/metrics/registry.go | 18 ++++ kubetest2/kubetest2-eksapi-janitor/main.go | 4 +- 10 files changed, 211 insertions(+), 14 deletions(-) create mode 100644 kubetest2/internal/metrics/cloudwatch.go create mode 100644 kubetest2/internal/metrics/noop.go create mode 100644 kubetest2/internal/metrics/registry.go diff --git a/go.work.sum b/go.work.sum index 1673a583f..5c66265cd 100644 --- a/go.work.sum +++ b/go.work.sum @@ -514,6 +514,7 @@ github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:W github.com/aws/aws-lambda-go v1.13.3 h1:SuCy7H3NLyp+1Mrfp+m80jcbi9KYWAs9/BXwppwRDzY= github.com/aws/aws-sdk-go v1.44.236 h1:Ilbq/9B617BNjviTPjZrSbMxUkCb/1M7DqHO6sXOJTc= github.com/aws/aws-sdk-go v1.44.236/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.46.4 h1:48tKgtm9VMPkb6y7HuYlsfhQmoIRAsTEXTsWLVlty4M= github.com/aws/aws-sdk-go v1.46.4/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-sdk-go-v2 v1.16.7/go.mod h1:6CpKuLXg2w7If3ABZCl/qZ6rEgwtjZTn4eAf4RcEyuw= github.com/aws/aws-sdk-go-v2 v1.21.0/go.mod h1:/RfNgGmRxI+iFOB1OeJUyxiU+9s88k3pfHvDagGEp0M= diff --git a/kubetest2/go.mod b/kubetest2/go.mod index ce174c875..5c2dcce44 100644 --- a/kubetest2/go.mod +++ b/kubetest2/go.mod @@ -3,12 +3,12 @@ module github.com/aws/aws-k8s-tester/kubetest2 go 1.21 require ( - github.com/aws/aws-sdk-go-v2 v1.23.1 + github.com/aws/aws-sdk-go-v2 v1.24.0 github.com/aws/aws-sdk-go-v2/config v1.18.14 github.com/aws/aws-sdk-go-v2/service/cloudformation v1.40.1 github.com/aws/aws-sdk-go-v2/service/ec2 v1.137.1 github.com/aws/aws-sdk-go-v2/service/eks v1.33.2 - github.com/aws/smithy-go v1.17.0 + github.com/aws/smithy-go v1.19.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/octago/sflags v0.2.0 github.com/spf13/pflag v1.0.5 @@ -53,11 +53,13 @@ require ( github.com/alibabacloud-go/tea-xml v1.1.2 // indirect github.com/aliyun/credentials-go v1.2.3 // indirect github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d // indirect + github.com/aws/aws-sdk-go v1.46.4 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.13.14 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.23 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.4 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.4 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.9 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.9 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.3.30 // indirect + github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.32.0 // indirect github.com/aws/aws-sdk-go-v2/service/ecr v1.15.0 // indirect github.com/aws/aws-sdk-go-v2/service/ecrpublic v1.12.0 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.1 // indirect diff --git a/kubetest2/go.sum b/kubetest2/go.sum index 08e940a5d..5b4f4e4a0 100644 --- a/kubetest2/go.sum +++ b/kubetest2/go.sum @@ -293,12 +293,16 @@ github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN github.com/aws/aws-sdk-go v1.34.28/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= github.com/aws/aws-sdk-go v1.37.0/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/aws/aws-sdk-go v1.37.6/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= +github.com/aws/aws-sdk-go v1.46.4 h1:48tKgtm9VMPkb6y7HuYlsfhQmoIRAsTEXTsWLVlty4M= +github.com/aws/aws-sdk-go v1.46.4/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= github.com/aws/aws-sdk-go-v2 v1.7.1/go.mod h1:L5LuPC1ZgDr2xQS7AmIec/Jlc7O/Y1u2KxJyNVab250= github.com/aws/aws-sdk-go-v2 v1.14.0/go.mod h1:ZA3Y8V0LrlWj63MQAnRHgKf/5QB//LSZCPNWlWrNGLU= github.com/aws/aws-sdk-go-v2 v1.17.5/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= github.com/aws/aws-sdk-go-v2 v1.23.1 h1:qXaFsOOMA+HsZtX8WoCa+gJnbyW7qyFFBlPqvTSzbaI= github.com/aws/aws-sdk-go-v2 v1.23.1/go.mod h1:i1XDttT4rnf6vxc9AuskLc6s7XBee8rlLilKlc03uAA= +github.com/aws/aws-sdk-go-v2 v1.24.0 h1:890+mqQ+hTpNuw0gGP6/4akolQkSToDJgHfQE7AwGuk= +github.com/aws/aws-sdk-go-v2 v1.24.0/go.mod h1:LNh45Br1YAkEKaAqvmE1m8FUx6a5b/V0oAKV7of29b4= github.com/aws/aws-sdk-go-v2/config v1.5.0/go.mod h1:RWlPOAW3E3tbtNAqTwvSW54Of/yP3oiZXMI0xfUdjyA= github.com/aws/aws-sdk-go-v2/config v1.18.14 h1:rI47jCe0EzuJlAO5ptREe3LIBAyP5c7gR3wjyYVjuOM= github.com/aws/aws-sdk-go-v2/config v1.18.14/go.mod h1:0pI6JQBHKwd0JnwAZS3VCapLKMO++UL2BOkWwyyzTnA= @@ -312,15 +316,21 @@ github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.5/go.mod h1:2hXc8ooJqF2 github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.29/go.mod h1:Dip3sIGv485+xerzVv24emnjX5Sg88utCL8fwGmCeWg= github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.4 h1:LAm3Ycm9HJfbSCd5I+wqC2S9Ej7FPrgr5CQoOljJZcE= github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.4/go.mod h1:xEhvbJcyUf/31yfGSQBe01fukXwXJ0gxDp7rLfymWE0= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.9 h1:v+HbZaCGmOwnTTVS86Fleq0vPzOd7tnJGbFhP0stNLs= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.9/go.mod h1:Xjqy+Nyj7VDLBtCMkQYOw1QYfAEZCVLrfI0ezve8wd4= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.3.0/go.mod h1:miRSv9l093jX/t/j+mBCaLqFHo9xKYzJ7DGm1BsGoJM= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.23/go.mod h1:mr6c4cHC+S/MMkrjtSlG4QA36kOznDep+0fga5L/fGQ= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.4 h1:4GV0kKZzUxiWxSVpn/9gwR0g21NF1Jsyduzo9rHgC/Q= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.4/go.mod h1:dYvTNAggxDZy6y1AF7YDwXsPuHFy/VNEpEI/2dWK9IU= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.9 h1:N94sVhRACtXyVcjXxrwK1SKFIJrA9pOJ5yu2eSHnmls= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.9/go.mod h1:hqamLz7g1/4EJP+GH5NBhcUMLjW+gKLQabgyz6/7WAU= github.com/aws/aws-sdk-go-v2/internal/ini v1.1.1/go.mod h1:Zy8smImhTdOETZqfyn01iNOe0CNggVbPjCajyaz6Gvg= github.com/aws/aws-sdk-go-v2/internal/ini v1.3.30 h1:IVx9L7YFhpPq0tTnGo8u8TpluFu7nAn9X3sUDMb11c0= github.com/aws/aws-sdk-go-v2/internal/ini v1.3.30/go.mod h1:vsbq62AOBwQ1LJ/GWKFxX8beUEYeRp/Agitrxee2/qM= github.com/aws/aws-sdk-go-v2/service/cloudformation v1.40.1 h1:gcJzqFpFy6no/GvMkA8L8ld3Wt/MygcJzj5xmpwfJuM= github.com/aws/aws-sdk-go-v2/service/cloudformation v1.40.1/go.mod h1:swqr+Ayq2Mv+l32CXjtrYrdNqMu5d0aSKeM63ud7G8M= +github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.32.0 h1:f426fLs4hcrLuczLBqWf1Ob6FKJhISaR4e9Iw3Scr5A= +github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.32.0/go.mod h1:G63GKqSBLpBmO3tN1/PwM2NC65XvSd00zJWTZk202bc= github.com/aws/aws-sdk-go-v2/service/ec2 v1.137.1 h1:J/N4ydefXQZIwKBDPtvrhxrIuP/vaaYKnAsy3bKVIvU= github.com/aws/aws-sdk-go-v2/service/ec2 v1.137.1/go.mod h1:hrBzQzlQQRmiaeYRQPr0SdSx6fdqP+5YcGhb97LCt8M= github.com/aws/aws-sdk-go-v2/service/ecr v1.4.1/go.mod h1:FglZcyeiBqcbvyinl+n14aT/EWC7S1MIH+Gan2iizt0= @@ -352,6 +362,8 @@ github.com/aws/smithy-go v1.11.0/go.mod h1:3xHYmszWVx2c0kIwQeEVf9uSm4fYZt67FBJnw github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/aws/smithy-go v1.17.0 h1:wWJD7LX6PBV6etBUwO0zElG0nWN9rUhp0WdYeHSHAaI= github.com/aws/smithy-go v1.17.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE= +github.com/aws/smithy-go v1.19.0 h1:KWFKQV80DpP3vJrrA9sVAHQ5gc2z8i4EzrLhLlWXcBM= +github.com/aws/smithy-go v1.19.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE= github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220228164355-396b2034c795 h1:IWeCJzU+IYaO2rVEBlGPTBfe90cmGXFTLdhUFlzDGsY= github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220228164355-396b2034c795/go.mod h1:8vJsEZ4iRqG+Vx6pKhWK6U00qcj0KC37IsfszMkY6UE= github.com/aybabtme/rgbterm v0.0.0-20170906152045-cc83f3b3ce59/go.mod h1:q/89r3U2H7sSsE2t6Kca0lfwTK8JdoNGS/yzM/4iH5I= diff --git a/kubetest2/internal/deployers/eksapi/deployer.go b/kubetest2/internal/deployers/eksapi/deployer.go index 0797b7371..a6c670004 100644 --- a/kubetest2/internal/deployers/eksapi/deployer.go +++ b/kubetest2/internal/deployers/eksapi/deployer.go @@ -2,15 +2,19 @@ package eksapi import ( "context" + "errors" "flag" "fmt" + "path" "path/filepath" "time" "github.com/aws/aws-k8s-tester/kubetest2/internal" "github.com/aws/aws-k8s-tester/kubetest2/internal/awssdk" + "github.com/aws/aws-k8s-tester/kubetest2/internal/metrics" "github.com/aws/aws-k8s-tester/kubetest2/internal/util" "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/cloudwatch" "github.com/aws/aws-sdk-go-v2/service/eks" ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types" "github.com/octago/sflags/gen/gpflag" @@ -24,6 +28,8 @@ const DeployerName = "eksapi" const ResourcePrefix = "kubetest2-" + DeployerName +var DeployerMetricNamespace = path.Join("kubetest2", DeployerName) + // assert that deployer implements types.DeployerWithKubeconfig var _ types.DeployerWithKubeconfig = &deployer{} @@ -34,6 +40,7 @@ type deployer struct { awsConfig aws.Config _awsClients *awsClients + _metrics metrics.MetricRegistry infra *infra cluster *cluster @@ -55,6 +62,7 @@ type deployerOptions struct { UnmanagedNodes bool `flag:"unmanaged-nodes" desc:"Use an AutoScalingGroup instead of an EKS-managed nodegroup."` NodeReadyTimeout time.Duration `flag:"node-ready-timeout" desc:"Time to wait for all nodes to become ready"` GenerateSSHKey bool `flag:"generate-ssh-key" desc:"Generate an SSH key to use for tests. The generated key should not be used in production, as it will not have a passphrase."` + EmitMetrics bool `flag:"emit-metrics" desc:"Record and emit metrics to CloudWatch"` } // NewDeployer implements deployer.New for EKS using the EKS (and other AWS) API(s) directly (no cloudformation) @@ -116,6 +124,18 @@ func (d *deployer) awsClients() *awsClients { return d._awsClients } +func (d *deployer) metrics() metrics.MetricRegistry { + if d._metrics == nil { + if d.deployerOptions.EmitMetrics { + client := cloudwatch.NewFromConfig(d.awsConfig) + d._metrics = metrics.NewCloudWatchRegistry(client) + } else { + d._metrics = metrics.NewNoopMetricRegistry() + } + } + return d._metrics +} + func (d *deployer) Up() error { if err := d.verifyUpFlags(); err != nil { return fmt.Errorf("up flags are invalid: %v", err) @@ -225,18 +245,23 @@ func (d *deployer) IsUp() (up bool, err error) { } func (d *deployer) Down() error { - return deleteResources(d.awsClients(), d.resourceID) + deleteErr := deleteResources(d.awsClients(), d.metrics(), d.resourceID) + metricErr := d.metrics().Emit() + if deleteErr != nil || metricErr != nil { + return errors.Join(deleteErr, metricErr) + } + return nil } -func deleteResources(clients *awsClients, resourceID string) error { +func deleteResources(clients *awsClients, metrics metrics.MetricRegistry, resourceID string) error { if err := deleteNodegroup(clients, resourceID); err != nil { return err } if err := deleteCluster(clients, resourceID); err != nil { return err } - if err := deleteLeakedENIs(clients, resourceID); err != nil { + if err := deleteLeakedENIs(clients, metrics, resourceID); err != nil { return err } - return deleteInfrastructureStack(clients, resourceID) + return deleteInfrastructureStack(clients, metrics, resourceID) } diff --git a/kubetest2/internal/deployers/eksapi/infra.go b/kubetest2/internal/deployers/eksapi/infra.go index 3cff47c32..d89ac72ca 100644 --- a/kubetest2/internal/deployers/eksapi/infra.go +++ b/kubetest2/internal/deployers/eksapi/infra.go @@ -5,17 +5,20 @@ import ( _ "embed" "errors" "fmt" + "path" "strings" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/cloudformation" cloudformationtypes "github.com/aws/aws-sdk-go-v2/service/cloudformation/types" + cloudwatchtypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" "k8s.io/klog" "github.com/aws/aws-k8s-tester/kubetest2/internal/deployers/eksapi/templates" + "github.com/aws/aws-k8s-tester/kubetest2/internal/metrics" ) const ( @@ -28,6 +31,20 @@ const ( // The tag is only added when --endpoint-url is passed to the deployer. const eksEndpointURLTag = "eks-endpoint-url" +var ( + infraMetricNamespace = path.Join(DeployerMetricNamespace, "infrastructure") + infraStackDeletionFailed = &metrics.MetricSpec{ + Namespace: infraMetricNamespace, + Metric: "StackDeletionFailed", + Unit: cloudwatchtypes.StandardUnitCount, + } + infraLeakedENIs = &metrics.MetricSpec{ + Namespace: infraMetricNamespace, + Metric: "LeakedENIs", + Unit: cloudwatchtypes.StandardUnitCount, + } +) + type infra struct { vpc string subnetsPublic []string @@ -130,7 +147,7 @@ func getInfrastructureStackResources(clients *awsClients, resourceID string) (*i return &infra, nil } -func deleteInfrastructureStack(clients *awsClients, resourceID string) error { +func deleteInfrastructureStack(clients *awsClients, metrics metrics.MetricRegistry, resourceID string) error { input := cloudformation.DeleteStackInput{ StackName: aws.String(resourceID), } @@ -152,7 +169,10 @@ func deleteInfrastructureStack(clients *awsClients, resourceID string) error { }, infraStackDeletionTimeout) if err != nil { - return fmt.Errorf("failed to wait for infrastructure stack deletion: %w", err) + // don't fail the overall test, the janitor can clean this up + klog.Warningf("failed to wait for infrastructure stack deletion: %v", err) + metrics.Record(infraStackDeletionFailed, 1, nil) + return nil } klog.Infof("deleted infrastructure stack: %s", resourceID) return nil @@ -160,7 +180,7 @@ func deleteInfrastructureStack(clients *awsClients, resourceID string) error { // deleteLeakedENIs deletes Elastic Network Interfaces that may have been allocated (and left behind) by the VPC CNI. // These leaked ENIs will prevent deletion of their associated subnets and security groups. -func deleteLeakedENIs(clients *awsClients, resourceID string) error { +func deleteLeakedENIs(clients *awsClients, metrics metrics.MetricRegistry, resourceID string) error { infra, err := getInfrastructureStackResources(clients, resourceID) if err != nil { var notFound *cloudformationtypes.StackNotFoundException @@ -205,5 +225,6 @@ func deleteLeakedENIs(clients *awsClients, resourceID string) error { } } klog.Infof("deleted %d leaked ENIs", deleted) + metrics.Record(infraLeakedENIs, float64(deleted), nil) return nil } diff --git a/kubetest2/internal/deployers/eksapi/janitor.go b/kubetest2/internal/deployers/eksapi/janitor.go index 8c2213e9c..25b874619 100644 --- a/kubetest2/internal/deployers/eksapi/janitor.go +++ b/kubetest2/internal/deployers/eksapi/janitor.go @@ -6,18 +6,27 @@ import ( "time" "github.com/aws/aws-k8s-tester/kubetest2/internal/awssdk" + "github.com/aws/aws-k8s-tester/kubetest2/internal/metrics" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/cloudformation" cloudformationtypes "github.com/aws/aws-sdk-go-v2/service/cloudformation/types" + "github.com/aws/aws-sdk-go-v2/service/cloudwatch" "k8s.io/klog/v2" ) -func NewJanitor(maxResourceAge time.Duration) *janitor { +func NewJanitor(maxResourceAge time.Duration, emitMetrics bool) *janitor { awsConfig := awssdk.NewConfig() + var metricRegistry metrics.MetricRegistry + if emitMetrics { + metricRegistry = metrics.NewCloudWatchRegistry(cloudwatch.NewFromConfig(awsConfig)) + } else { + metricRegistry = metrics.NewNoopMetricRegistry() + } return &janitor{ maxResourceAge: maxResourceAge, awsConfig: awsConfig, cfnClient: cloudformation.NewFromConfig(awsConfig), + metrics: metricRegistry, } } @@ -25,6 +34,7 @@ type janitor struct { maxResourceAge time.Duration awsConfig aws.Config cfnClient *cloudformation.Client + metrics metrics.MetricRegistry } func (j *janitor) Sweep(ctx context.Context) error { @@ -49,7 +59,7 @@ func (j *janitor) Sweep(ctx context.Context) error { } clients := j.awsClientsForStack(stack) klog.Infof("deleting resources (%v old): %s", resourceAge, resourceID) - if err := deleteResources(clients, resourceID); err != nil { + if err := deleteResources(clients, j.metrics, resourceID); err != nil { return err } } diff --git a/kubetest2/internal/metrics/cloudwatch.go b/kubetest2/internal/metrics/cloudwatch.go new file mode 100644 index 000000000..55997fff7 --- /dev/null +++ b/kubetest2/internal/metrics/cloudwatch.go @@ -0,0 +1,93 @@ +package metrics + +import ( + "context" + "sync" + "time" + + "github.com/aws/aws-sdk-go-v2/service/cloudwatch" + "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" + "github.com/aws/aws-sdk-go/aws" + "k8s.io/klog" +) + +// NewCloudWatchRegistry creates a new metric registry that will emit values using the specified cloudwatch client +func NewCloudWatchRegistry(cw *cloudwatch.Client) MetricRegistry { + return &cloudwatchRegistry{ + cw: cw, + lock: &sync.Mutex{}, + dataByNamespace: make(map[string][]*cloudwatchMetricDatum), + } +} + +type cloudwatchRegistry struct { + cw *cloudwatch.Client + lock *sync.Mutex + dataByNamespace map[string][]*cloudwatchMetricDatum +} + +type cloudwatchMetricDatum struct { + spec *MetricSpec + value float64 + dimensions map[string]string + timestamp time.Time +} + +func (r *cloudwatchRegistry) Record(spec *MetricSpec, value float64, dimensions map[string]string) { + r.lock.Lock() + defer r.lock.Unlock() + r.dataByNamespace[spec.Namespace] = append(r.dataByNamespace[spec.Namespace], &cloudwatchMetricDatum{ + spec: spec, + value: value, + dimensions: dimensions, + timestamp: time.Now(), + }) +} + +func (r *cloudwatchRegistry) Emit() error { + r.lock.Lock() + defer r.lock.Unlock() + for namespace, data := range r.dataByNamespace { + for i := 0; i < len(data); { + var metricData []types.MetricDatum + // we can emit up to 1000 values per PutMetricData + for j := 0; j < 1000; j++ { + datum := data[i] + var dimensions []types.Dimension + for key, val := range datum.dimensions { + dimensions = append(dimensions, types.Dimension{ + Name: aws.String(key), + Value: aws.String(val), + }) + } + metricData = append(metricData, types.MetricDatum{ + MetricName: aws.String(datum.spec.Metric), + Value: aws.Float64(datum.value), + Dimensions: dimensions, + Timestamp: &datum.timestamp, + }) + i++ + } + _, err := r.cw.PutMetricData(context.TODO(), &cloudwatch.PutMetricDataInput{ + Namespace: aws.String(namespace), + MetricData: metricData, + }) + if err != nil { + return err + } + } + klog.Infof("emitted %d metrics to namespace: %s", len(data), namespace) + } + r.dataByNamespace = make(map[string][]*cloudwatchMetricDatum) + return nil +} + +func (r *cloudwatchRegistry) GetRegistered() int { + r.lock.Lock() + defer r.lock.Unlock() + registered := 0 + for _, data := range r.dataByNamespace { + registered += len(data) + } + return registered +} diff --git a/kubetest2/internal/metrics/noop.go b/kubetest2/internal/metrics/noop.go new file mode 100644 index 000000000..bae812e0e --- /dev/null +++ b/kubetest2/internal/metrics/noop.go @@ -0,0 +1,13 @@ +package metrics + +func NewNoopMetricRegistry() MetricRegistry { + return &noopRegistry{} +} + +type noopRegistry struct{} + +func (r *noopRegistry) Record(spec *MetricSpec, value float64, dimensions map[string]string) {} + +func (r *noopRegistry) Emit() error { + return nil +} diff --git a/kubetest2/internal/metrics/registry.go b/kubetest2/internal/metrics/registry.go new file mode 100644 index 000000000..e5a2feb59 --- /dev/null +++ b/kubetest2/internal/metrics/registry.go @@ -0,0 +1,18 @@ +package metrics + +import ( + "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" +) + +type MetricRegistry interface { + // Record adds a new metric value to the registry + Record(spec *MetricSpec, value float64, dimensions map[string]string) + // Emit sends all registered metric values to cloudwatch, emptying the registry + Emit() error +} + +type MetricSpec struct { + Namespace string + Metric string + Unit types.StandardUnit +} diff --git a/kubetest2/kubetest2-eksapi-janitor/main.go b/kubetest2/kubetest2-eksapi-janitor/main.go index 70c353442..55aa24e92 100644 --- a/kubetest2/kubetest2-eksapi-janitor/main.go +++ b/kubetest2/kubetest2-eksapi-janitor/main.go @@ -12,8 +12,10 @@ import ( func main() { var maxResourceAge time.Duration flag.DurationVar(&maxResourceAge, "max-resource-age", time.Hour*3, "Maximum resource age") + var emitMetrics bool + flag.BoolVar(&emitMetrics, "emit-metrics", false, "Send metrics to CloudWatch") flag.Parse() - j := eksapi.NewJanitor(maxResourceAge) + j := eksapi.NewJanitor(maxResourceAge, emitMetrics) if err := j.Sweep(context.Background()); err != nil { klog.Fatalf("failed to sweep resources: %v", err) }