From 755da9d02b7e964a8873788c6bf922f66161eeb2 Mon Sep 17 00:00:00 2001 From: galargh Date: Thu, 18 Apr 2024 18:17:46 +0200 Subject: [PATCH 1/8] feat: adjust pool dynamically based on demand --- .../control-plane/src/pool/pool.test.ts | 150 ++++++++++++- .../functions/control-plane/src/pool/pool.ts | 202 ++++++++++++------ 2 files changed, 282 insertions(+), 70 deletions(-) diff --git a/lambdas/functions/control-plane/src/pool/pool.test.ts b/lambdas/functions/control-plane/src/pool/pool.test.ts index fabcb47c23..dd300027f1 100644 --- a/lambdas/functions/control-plane/src/pool/pool.test.ts +++ b/lambdas/functions/control-plane/src/pool/pool.test.ts @@ -9,13 +9,18 @@ import { createRunners } from '../scale-runners/scale-up'; import { adjust } from './pool'; const mockOctokit = { - paginate: jest.fn(), + paginate: (f: any, o: any) => f(o), checks: { get: jest.fn() }, actions: { createRegistrationTokenForOrg: jest.fn(), + listJobsForWorkflowRunAttempt: jest.fn(), + listSelfHostedRunnersForOrg: jest.fn(), + listSelfHostedRunnersForRepo: jest.fn(), + listWorkflowRunsForRepo: jest.fn(), }, apps: { getOrgInstallation: jest.fn(), + listReposAccessibleToInstallation: jest.fn(), }, }; @@ -42,6 +47,7 @@ const cleanEnv = process.env; const ORG = 'my-org'; const MINIMUM_TIME_RUNNING = 15; +const LABELS = ['label1', 'label2']; const ec2InstancesRegistered = [ { @@ -79,7 +85,7 @@ const githubRunnersRegistered = [ os: 'linux', status: 'online', busy: false, - labels: [], + labels: LABELS, }, { id: 2, @@ -87,7 +93,7 @@ const githubRunnersRegistered = [ os: 'linux', status: 'online', busy: true, - labels: [], + labels: LABELS, }, { id: 3, @@ -95,7 +101,7 @@ const githubRunnersRegistered = [ os: 'linux', status: 'offline', busy: false, - labels: [], + labels: LABELS, }, { id: 3, @@ -103,7 +109,22 @@ const githubRunnersRegistered = [ os: 'linux', status: 'online', busy: false, - labels: [], + labels: LABELS, + }, +]; + +const githubReposAccessibleToInstallation = [ + { + owner: { + login: ORG, + }, + name: 'my-repo-1', + }, + { + owner: { + login: ORG, + }, + name: 'my-repo-2', }, ]; @@ -126,6 +147,7 @@ beforeEach(() => { process.env.INSTANCE_TARGET_CAPACITY_TYPE = 'spot'; process.env.RUNNER_OWNER = ORG; process.env.RUNNER_BOOT_TIME_IN_MINUTES = MINIMUM_TIME_RUNNING.toString(); + process.env.RUNNER_LABELS = LABELS.join(','); const mockTokenReturnValue = { data: { @@ -134,7 +156,15 @@ beforeEach(() => { }; mockOctokit.actions.createRegistrationTokenForOrg.mockImplementation(() => mockTokenReturnValue); - mockOctokit.paginate.mockImplementation(() => githubRunnersRegistered); + mockOctokit.actions.listSelfHostedRunnersForOrg.mockImplementation(() => githubRunnersRegistered); + + mockOctokit.actions.listSelfHostedRunnersForRepo.mockImplementation(() => githubRunnersRegistered); + + mockOctokit.apps.listReposAccessibleToInstallation.mockImplementation(() => githubReposAccessibleToInstallation); + + mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async () => []); + + mockOctokit.actions.listJobsForWorkflowRunAttempt.mockImplementation(async () => []); mockListRunners.mockImplementation(async () => ec2InstancesRegistered); @@ -171,7 +201,7 @@ describe('Test simple pool.', () => { await expect(await adjust({ poolSize: 3 })).resolves; expect(createRunners).toHaveBeenCalledTimes(1); expect(createRunners).toHaveBeenCalledWith( - expect.anything(), + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), expect.objectContaining({ numberOfRunners: 1 }), expect.anything(), ); @@ -206,7 +236,7 @@ describe('Test simple pool.', () => { // 2 idle + 1 booting = 3, top up with 2 to match a pool of 5 await expect(await adjust({ poolSize: 5 })).resolves; expect(createRunners).toHaveBeenCalledWith( - expect.anything(), + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), expect.objectContaining({ numberOfRunners: 2 }), expect.anything(), ); @@ -247,7 +277,7 @@ describe('Test simple pool.', () => { await expect(await adjust({ poolSize: 5 })).resolves; // 2 idle, top up with 3 to match a pool of 5 expect(createRunners).toHaveBeenCalledWith( - expect.anything(), + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), expect.objectContaining({ numberOfRunners: 3 }), expect.anything(), ); @@ -261,7 +291,7 @@ describe('Test simple pool.', () => { it('Should top up with fewer runners when there are idle prefixed runners', async () => { // Add prefixed runners to github - mockOctokit.paginate.mockImplementation(async () => [ + mockOctokit.actions.listSelfHostedRunnersForOrg.mockImplementation(async () => [ ...githubRunnersRegistered, { id: 5, @@ -301,10 +331,110 @@ describe('Test simple pool.', () => { await expect(await adjust({ poolSize: 5 })).resolves; // 2 idle, 2 prefixed idle top up with 1 to match a pool of 5 expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), + expect.objectContaining({ numberOfRunners: 1 }), + expect.anything(), + ); + }); + }); + + describe('With Negative Pool Size', () => { + // effective pool size is 2 (1 queued job with matching labels x 1 workflows x 2 accessible repositories) + it('Should not top up if there are fewer queued jobs than idle runners.', async () => { + mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async ({ owner, repo }) => [ + { + repository: { + owner: { login: owner }, + name: repo, + }, + id: 1, + attempt_number: 1, + }, + ]); + mockOctokit.actions.listJobsForWorkflowRunAttempt.mockImplementation(async () => [ + { + status: 'completed', + labels: LABELS, + }, + { + status: 'queued', + labels: LABELS, + }, + { + status: 'queued', + labels: [...LABELS, 'label3'], + }, + ]); + await expect(await adjust({ poolSize: -1 })).resolves; + expect(createRunners).not.toHaveBeenCalled(); + }); + // effective pool size is 8 (2 queued job with matching labels x 2 workflows x 2 accessible repositories) + it('Should top up if there are more queued jobs with matching labels than idle runners.', async () => { + mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async ({ owner, repo }) => [ + { + repository: { + owner: { login: owner }, + name: repo, + }, + id: 1, + attempt_number: 1, + }, + { + repository: { + owner: { login: owner }, + name: repo, + }, + id: 2, + attempt_number: 1, + }, + ]); + mockOctokit.actions.listJobsForWorkflowRunAttempt.mockImplementation(async () => [ + { + status: 'queued', + labels: LABELS, + }, + { + status: 'queued', + labels: LABELS, + }, + ]); + await expect(await adjust({ poolSize: -1 })).resolves; + expect(createRunners).toHaveBeenCalledTimes(1); + expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), + expect.objectContaining({ numberOfRunners: 6 }), expect.anything(), + ); + }); + }); + + describe('With Runner Type Repo', () => { + it('Should top up the repository runners pool', async () => { + const runnerOwner = `${ORG}/my-repo-1`; + process.env.RUNNER_OWNER = runnerOwner; + await expect(await adjust({ poolSize: 3 })).resolves; + expect(createRunners).toHaveBeenCalledTimes(1); + expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner, runnerType: 'Repo' }), expect.objectContaining({ numberOfRunners: 1 }), expect.anything(), ); }); }); + + describe('With Multiple Runner Owners', () => { + it('Should top up pools for all runner owners', async () => { + const runnerOwners = [`${ORG}/my-repo-1`, `${ORG}/my-repo-2`]; + process.env.RUNNER_OWNER = runnerOwners.join(','); + await expect(await adjust({ poolSize: 3 })).resolves; + expect(createRunners).toHaveBeenCalledTimes(2); + for (const runnerOwner of runnerOwners) { + expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner, runnerType: 'Repo' }), + expect.objectContaining({ numberOfRunners: 1 }), + expect.anything(), + ); + } + }); + }); }); diff --git a/lambdas/functions/control-plane/src/pool/pool.ts b/lambdas/functions/control-plane/src/pool/pool.ts index a906928941..915d2f37f1 100644 --- a/lambdas/functions/control-plane/src/pool/pool.ts +++ b/lambdas/functions/control-plane/src/pool/pool.ts @@ -1,4 +1,5 @@ import { Octokit } from '@octokit/rest'; +import { GetResponseDataTypeFromEndpointMethod } from '@octokit/types'; import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util'; import yn from 'yn'; @@ -6,9 +7,12 @@ import { bootTimeExceeded, listEC2Runners } from '../aws/runners'; import { RunnerList } from '../aws/runners.d'; import { createGithubAppAuth, createGithubInstallationAuth, createOctoClient } from '../gh-auth/gh-auth'; import { createRunners } from '../scale-runners/scale-up'; +import { RunnerType } from '../aws/runners.d'; const logger = createChildLogger('pool'); +type Repository = GetResponseDataTypeFromEndpointMethod; + export interface PoolEvent { poolSize: number; } @@ -18,6 +22,13 @@ interface RunnerStatus { status: string; } +function canRunJob(workflowJobLabels: string[], runnerLabels: string[]): boolean { + runnerLabels = runnerLabels.map((label) => label.toLowerCase()); + const matchLabels = workflowJobLabels.every((wl) => runnerLabels.includes(wl.toLowerCase())); + const match = workflowJobLabels.length === 0 ? !matchLabels : matchLabels; + return match; +} + export async function adjust(event: PoolEvent): Promise { logger.info(`Checking current pool size against pool of size: ${event.poolSize}`); const runnerLabels = process.env.RUNNER_LABELS || ''; @@ -36,7 +47,7 @@ export async function adjust(event: PoolEvent): Promise { const launchTemplateName = process.env.LAUNCH_TEMPLATE_NAME; const instanceMaxSpotPrice = process.env.INSTANCE_MAX_SPOT_PRICE; const instanceAllocationStrategy = process.env.INSTANCE_ALLOCATION_STRATEGY || 'lowest-price'; // same as AWS default - const runnerOwner = process.env.RUNNER_OWNER; + const runnerOwners = process.env.RUNNER_OWNER.split(','); const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME; const tracingEnabled = yn(process.env.POWERTOOLS_TRACE_ENABLED, { default: false }); const onDemandFailoverOnError = process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS @@ -48,63 +59,123 @@ export async function adjust(event: PoolEvent): Promise { ghesApiUrl = `${ghesBaseUrl}/api/v3`; } - const installationId = await getInstallationId(ghesApiUrl, runnerOwner); - const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); - const githubInstallationClient = await createOctoClient(ghAuth.token, ghesApiUrl); - - // Get statusses of runners registed in GitHub - const runnerStatusses = await getGitHubRegisteredRunnnerStatusses( - githubInstallationClient, - runnerOwner, - runnerNamePrefix, - ); - - // Look up the managed ec2 runners in AWS, but running does not mean idle - const ec2runners = await listEC2Runners({ - environment, - runnerOwner, - runnerType: 'Org', - statuses: ['running'], - }); - - const numberOfRunnersInPool = calculatePooSize(ec2runners, runnerStatusses); - const topUp = event.poolSize - numberOfRunnersInPool; - - if (topUp > 0) { - logger.info(`The pool will be topped up with ${topUp} runners.`); - await createRunners( - { - ephemeral, - enableJitConfig, - ghesBaseUrl, - runnerLabels, - runnerGroup, - runnerOwner, - runnerNamePrefix, - runnerType: 'Org', - disableAutoUpdate: disableAutoUpdate, - ssmTokenPath, - ssmConfigPath, - }, - { - ec2instanceCriteria: { - instanceTypes, - targetCapacityType: instanceTargetTargetCapacityType, - maxSpotPrice: instanceMaxSpotPrice, - instanceAllocationStrategy: instanceAllocationStrategy, - }, - environment, - launchTemplateName, - subnets, - numberOfRunners: topUp, - amiIdSsmParameterName, - tracingEnabled, - onDemandFailoverOnError, - }, + for (const runnerOwner of runnerOwners) { + logger.info(`Checking ${runnerOwner}`); + + const [owner, repo] = runnerOwner.split('/'); + const runnerType = repo === undefined ? 'Org' : 'Repo'; + + const installationId = await getInstallationId(ghesApiUrl, owner); + const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); + const githubInstallationClient = await createOctoClient(ghAuth.token, ghesApiUrl); + + // Get statusses of runners registed in GitHub + const runnerStatusses = await getGitHubRegisteredRunnnerStatusses( githubInstallationClient, + runnerOwner, + runnerType, + runnerNamePrefix, ); - } else { - logger.info(`Pool will not be topped up. Found ${numberOfRunnersInPool} managed idle runners.`); + + // Look up the managed ec2 runners in AWS, but running does not mean idle + const ec2runners = await listEC2Runners({ + environment, + runnerOwner, + runnerType, + statuses: ['running'], + }); + + const numberOfRunnersInPool = calculatePooSize(ec2runners, runnerStatusses); + let topUp = 0; + if (event.poolSize >= 0) { + topUp = event.poolSize - numberOfRunnersInPool; + } else if (event.poolSize === -1) { + logger.info('Checking for queued jobs to determine pool size'); + let repos; + if (runnerType === 'Repo') { + repos = [repo]; + } else { + // @ts-ignore + // The types normalized by paginate are not correct because they only flatten .data, + // while in case of listReposAccessibleToInstallation they should normalize .repositories. + const reposAccessibleToInstallation = (await githubInstallationClient.paginate( + githubInstallationClient.apps.listReposAccessibleToInstallation, + { + per_page: 100, + }, + )) as Repository[]; + repos = reposAccessibleToInstallation.filter((repo) => repo.owner.login === owner).map((repo) => repo.name); + } + const queuedWorkflowRuns = []; + for (const repo of repos) { + const workflowRuns = await githubInstallationClient.paginate( + githubInstallationClient.actions.listWorkflowRunsForRepo, + { + owner, + repo, + status: 'queued', + per_page: 100, + }, + ); + queuedWorkflowRuns.push(...workflowRuns); + } + const queuedJobs = []; + for (const workflowRun of queuedWorkflowRuns) { + const jobs = await githubInstallationClient.paginate( + githubInstallationClient.actions.listJobsForWorkflowRunAttempt, + { + owner: workflowRun.repository.owner.login, + repo: workflowRun.repository.name, + run_id: workflowRun.id, + attempt_number: workflowRun.run_attempt || 1, + per_page: 100, + }, + ); + queuedJobs.push(...jobs.filter((job) => job.status === 'queued')); + } + const numberOfQueuedJobs = queuedJobs.filter((job) => canRunJob(job.labels, runnerLabels.split(','))).length; + logger.info(`Found ${numberOfQueuedJobs} queued jobs`); + topUp = numberOfQueuedJobs - numberOfRunnersInPool; + } else { + logger.error(`Invalid pool size: ${event.poolSize}`); + } + + if (topUp > 0) { + logger.info(`The pool will be topped up with ${topUp} runners.`); + await createRunners( + { + ephemeral, + enableJitConfig, + ghesBaseUrl, + runnerLabels, + runnerGroup, + runnerOwner, + runnerNamePrefix, + runnerType, + disableAutoUpdate: disableAutoUpdate, + ssmTokenPath, + ssmConfigPath, + }, + { + ec2instanceCriteria: { + instanceTypes, + targetCapacityType: instanceTargetTargetCapacityType, + maxSpotPrice: instanceMaxSpotPrice, + instanceAllocationStrategy: instanceAllocationStrategy, + }, + environment, + launchTemplateName, + subnets, + numberOfRunners: topUp, + amiIdSsmParameterName, + tracingEnabled, + onDemandFailoverOnError, + }, + githubInstallationClient, + ); + } else { + logger.info(`Pool will not be topped up. Found ${numberOfRunnersInPool} managed idle runners.`); + } } } @@ -146,12 +217,23 @@ function calculatePooSize(ec2runners: RunnerList[], runnerStatus: Map> { - const runners = await ghClient.paginate(ghClient.actions.listSelfHostedRunnersForOrg, { - org: runnerOwner, - per_page: 100, - }); + let runners; + if (runnerType === 'Repo') { + const [owner, repo] = runnerOwner.split('/'); + runners = await ghClient.paginate(ghClient.actions.listSelfHostedRunnersForRepo, { + owner, + repo, + per_page: 100, + }); + } else { + runners = await ghClient.paginate(ghClient.actions.listSelfHostedRunnersForOrg, { + org: runnerOwner, + per_page: 100, + }); + } const runnerStatus = new Map(); for (const runner of runners) { runner.name = runnerNamePrefix ? runner.name.replace(runnerNamePrefix, '') : runner.name; From 17e986d977e7c2f5941f89b770053adcaeff6ab0 Mon Sep 17 00:00:00 2001 From: galargh Date: Sat, 20 Apr 2024 10:33:44 +0200 Subject: [PATCH 2/8] docs: update pool configuration description --- README.md | 4 ++-- modules/multi-runner/README.md | 2 +- modules/multi-runner/variables.tf | 4 ++-- modules/runners/README.md | 4 ++-- modules/runners/variables.tf | 4 ++-- variables.tf | 4 ++-- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 651d3236b6..cadff60b09 100644 --- a/README.md +++ b/README.md @@ -181,11 +181,11 @@ Talk to the forestkeepers in the `runners-channel` on Slack. | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | | [metrics\_namespace](#input\_metrics\_namespace) | The namespace for the metrics created by the module. Merics will only be created if explicit enabled. | `string` | `"GitHub Runners"` | no | | [minimum\_running\_time\_in\_minutes](#input\_minimum\_running\_time\_in\_minutes) | The time an ec2 action runner should be running at minimum before terminated, if not busy. | `number` | `null` | no | -| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. |
list(object({
schedule_expression = string
size = number
}))
| `[]` | no | +| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting pool size to -1 will adjust the pool based on the number of queued jobs. |
list(object({
schedule_expression = string
size = number
}))
| `[]` | no | | [pool\_lambda\_memory\_size](#input\_pool\_lambda\_memory\_size) | Memory size limit for scale-up lambda. | `number` | `512` | no | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | -| [pool\_runner\_owner](#input\_pool\_runner\_owner) | The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported. | `string` | `null` | no | +| [pool\_runner\_owner](#input\_pool\_runner\_owner) | The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma. | `string` | `null` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | | [queue\_encryption](#input\_queue\_encryption) | Configure how data on queues managed by the modules in ecrypted at REST. Options are encryped via SSE, non encrypted and via KMSS. By default encryptes via SSE is enabled. See for more details the Terraform `aws_sqs_queue` resource https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue. |
object({
kms_data_key_reuse_period_seconds = number
kms_master_key_id = string
sqs_managed_sse_enabled = bool
})
|
{
"kms_data_key_reuse_period_seconds": null,
"kms_master_key_id": null,
"sqs_managed_sse_enabled": true
}
| no | | [redrive\_build\_queue](#input\_redrive\_build\_queue) | Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries. |
object({
enabled = bool
maxReceiveCount = number
})
|
{
"enabled": false,
"maxReceiveCount": null
}
| no | diff --git a/modules/multi-runner/README.md b/modules/multi-runner/README.md index 09517e0316..b078cc3e90 100644 --- a/modules/multi-runner/README.md +++ b/modules/multi-runner/README.md @@ -147,7 +147,7 @@ module "multi-runner" { | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with | `string` | `null` | no | | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | | [metrics\_namespace](#input\_metrics\_namespace) | The namespace for the metrics created by the module. Merics will only be created if explicit enabled. | `string` | `"GitHub Runners"` | no | -| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_runner\_on\_demand\_failover\_for\_errors "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
fifo: "Enable a FIFO queue to remain the order of events received by the webhook. Suggest to set to true for repo level runners."
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owner = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
schedule_expression = string
size = number
})), [])
})

matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
fifo = optional(bool, false)
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | +| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_runner\_on\_demand\_failover\_for\_errors "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting pool size to -1 will adjust the pool based on the number of queued jobs."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
fifo: "Enable a FIFO queue to remain the order of events received by the webhook. Suggest to set to true for repo level runners."
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owner = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
schedule_expression = string
size = number
})), [])
})

matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
fifo = optional(bool, false)
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index 33556a8095..73d41c16b8 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -152,7 +152,7 @@ variable "multi_runner_config" { instance_types: "List of instance types for the action runner. Defaults are based on runner_os (al2023 for linux and Windows Server Core for win)." job_queue_retention_in_seconds: "The number of seconds the job is held in the queue before it is purged" minimum_running_time_in_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy." - pool_runner_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported." + pool_runner_owner: "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma." runner_additional_security_group_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi_runner_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi_runner_config, the additional security group(s) will be applied to the individual runner." runner_as_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored." runner_boot_time_in_minutes: "The minimum time for an EC2 runner to boot and register as a runner." @@ -177,7 +177,7 @@ variable "multi_runner_config" { idle_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle." runner_log_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details." block_device_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`." - pool_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1." + pool_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting pool size to -1 will adjust the pool based on the number of queued jobs." } matcherConfig: { labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`" diff --git a/modules/runners/README.md b/modules/runners/README.md index 5bfc4bb167..e195610dea 100644 --- a/modules/runners/README.md +++ b/modules/runners/README.md @@ -182,11 +182,11 @@ yarn run dist | [metadata\_options](#input\_metadata\_options) | Metadata options for the ec2 runner instances. By default, the module uses metadata tags for bootstrapping the runner, only disable `instance_metadata_tags` when using custom scripts for starting the runner. | `map(any)` |
{
"http_endpoint": "enabled",
"http_put_response_hop_limit": 1,
"http_tokens": "required",
"instance_metadata_tags": "enabled"
}
| no | | [minimum\_running\_time\_in\_minutes](#input\_minimum\_running\_time\_in\_minutes) | The time an ec2 action runner should be running at minimum before terminated if non busy. If not set the default is calculated based on the OS. | `number` | `null` | no | | [overrides](#input\_overrides) | This map provides the possibility to override some defaults. The following attributes are supported: `name_sg` overrides the `Name` tag for all security groups created by this module. `name_runner_agent_instance` overrides the `Name` tag for the ec2 instance defined in the auto launch configuration. `name_docker_machine_runners` overrides the `Name` tag spot instances created by the runner agent. | `map(string)` |
{
"name_runner": "",
"name_sg": ""
}
| no | -| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. |
list(object({
schedule_expression = string
size = number
}))
| `[]` | no | +| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting pool size to -1 will adjust the pool based on the number of queued jobs. |
list(object({
schedule_expression = string
size = number
}))
| `[]` | no | | [pool\_lambda\_memory\_size](#input\_pool\_lambda\_memory\_size) | Lambda Memory size limit in MB for pool lambda | `number` | `512` | no | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | -| [pool\_runner\_owner](#input\_pool\_runner\_owner) | The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported. | `string` | `null` | no | +| [pool\_runner\_owner](#input\_pool\_runner\_owner) | The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma. | `string` | `null` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | | [role\_path](#input\_role\_path) | The path that will be added to the role; if not set, the prefix will be used. | `string` | `null` | no | | [role\_permissions\_boundary](#input\_role\_permissions\_boundary) | Permissions boundary that will be added to the created role for the lambda. | `string` | `null` | no | diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index 5c56917259..c8771116fc 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -526,7 +526,7 @@ variable "pool_lambda_memory_size" { } variable "pool_runner_owner" { - description = "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported." + description = "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma." type = string default = null } @@ -538,7 +538,7 @@ variable "pool_lambda_reserved_concurrent_executions" { } variable "pool_config" { - description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1." + description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting pool size to -1 will adjust the pool based on the number of queued jobs." type = list(object({ schedule_expression = string size = number diff --git a/variables.tf b/variables.tf index 51bd7d8e10..d658c0da83 100644 --- a/variables.tf +++ b/variables.tf @@ -665,7 +665,7 @@ variable "pool_lambda_timeout" { } variable "pool_runner_owner" { - description = "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported." + description = "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma." type = string default = null } @@ -677,7 +677,7 @@ variable "pool_lambda_reserved_concurrent_executions" { } variable "pool_config" { - description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1." + description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting the pool size to -1 will adjust the pool based on the number of queued jobs." type = list(object({ schedule_expression = string size = number From eada9ce826d7233912f7cc01dae6f2153dfc3b57 Mon Sep 17 00:00:00 2001 From: galargh Date: Sat, 20 Apr 2024 11:22:16 +0200 Subject: [PATCH 3/8] chore: make pool adhere to the linting conventions --- lambdas/functions/control-plane/src/pool/pool.test.ts | 2 +- lambdas/functions/control-plane/src/pool/pool.ts | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lambdas/functions/control-plane/src/pool/pool.test.ts b/lambdas/functions/control-plane/src/pool/pool.test.ts index dd300027f1..2f7860a3b1 100644 --- a/lambdas/functions/control-plane/src/pool/pool.test.ts +++ b/lambdas/functions/control-plane/src/pool/pool.test.ts @@ -9,7 +9,7 @@ import { createRunners } from '../scale-runners/scale-up'; import { adjust } from './pool'; const mockOctokit = { - paginate: (f: any, o: any) => f(o), + paginate: (f: (arg0: unknown) => unknown[], o: unknown) => f(o), checks: { get: jest.fn() }, actions: { createRegistrationTokenForOrg: jest.fn(), diff --git a/lambdas/functions/control-plane/src/pool/pool.ts b/lambdas/functions/control-plane/src/pool/pool.ts index 915d2f37f1..fd34cdc049 100644 --- a/lambdas/functions/control-plane/src/pool/pool.ts +++ b/lambdas/functions/control-plane/src/pool/pool.ts @@ -69,6 +69,8 @@ export async function adjust(event: PoolEvent): Promise { const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); const githubInstallationClient = await createOctoClient(ghAuth.token, ghesApiUrl); + githubInstallationClient.paginate; + // Get statusses of runners registed in GitHub const runnerStatusses = await getGitHubRegisteredRunnnerStatusses( githubInstallationClient, @@ -95,9 +97,9 @@ export async function adjust(event: PoolEvent): Promise { if (runnerType === 'Repo') { repos = [repo]; } else { - // @ts-ignore - // The types normalized by paginate are not correct because they only flatten .data, - // while in case of listReposAccessibleToInstallation they should normalize .repositories. + // @ts-expect-error The types normalized by paginate are not correct, + // because they only flatten .data, while in case of listReposAccessibleToInstallation, + // they should flatten .repositories. const reposAccessibleToInstallation = (await githubInstallationClient.paginate( githubInstallationClient.apps.listReposAccessibleToInstallation, { From f8bc290467001ecd7d3e5f628cbcbcf607691695 Mon Sep 17 00:00:00 2001 From: galargh Date: Sat, 20 Apr 2024 11:37:11 +0200 Subject: [PATCH 4/8] chore: improve pool statement test coverage --- .../control-plane/src/pool/pool.test.ts | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/lambdas/functions/control-plane/src/pool/pool.test.ts b/lambdas/functions/control-plane/src/pool/pool.test.ts index 2f7860a3b1..9144b40d2a 100644 --- a/lambdas/functions/control-plane/src/pool/pool.test.ts +++ b/lambdas/functions/control-plane/src/pool/pool.test.ts @@ -266,6 +266,11 @@ describe('Test simple pool.', () => { await expect(await adjust({ poolSize: 2 })).resolves; expect(createRunners).not.toHaveBeenCalled(); }); + + it('Should not top up if pool size is invalid.', async () => { + await expect(await adjust({ poolSize: -2 })).resolves; + expect(createRunners).not.toHaveBeenCalled(); + }); }); describe('With GHES', () => { @@ -368,6 +373,7 @@ describe('Test simple pool.', () => { await expect(await adjust({ poolSize: -1 })).resolves; expect(createRunners).not.toHaveBeenCalled(); }); + // effective pool size is 8 (2 queued job with matching labels x 2 workflows x 2 accessible repositories) it('Should top up if there are more queued jobs with matching labels than idle runners.', async () => { mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async ({ owner, repo }) => [ @@ -420,6 +426,46 @@ describe('Test simple pool.', () => { expect.anything(), ); }); + + it('Should top up the repository runners pool dynamically', async () => { + const runnerOwner = `${ORG}/my-repo-1`; + process.env.RUNNER_OWNER = runnerOwner; + mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async ({ owner, repo }) => [ + { + repository: { + owner: { login: owner }, + name: repo, + }, + id: 1, + attempt_number: 1, + }, + { + repository: { + owner: { login: owner }, + name: repo, + }, + id: 2, + attempt_number: 1, + }, + ]); + mockOctokit.actions.listJobsForWorkflowRunAttempt.mockImplementation(async () => [ + { + status: 'queued', + labels: LABELS, + }, + { + status: 'queued', + labels: LABELS, + }, + ]); + await expect(await adjust({ poolSize: -1 })).resolves; + expect(createRunners).toHaveBeenCalledTimes(1); + expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner, runnerType: 'Repo' }), + expect.objectContaining({ numberOfRunners: 2 }), + expect.anything(), + ); + }); }); describe('With Multiple Runner Owners', () => { From e9a9cab16154b0037376467aaec61c778a79347c Mon Sep 17 00:00:00 2001 From: galargh Date: Sat, 20 Apr 2024 11:52:51 +0200 Subject: [PATCH 5/8] chore: improve pool branch test coverage --- lambdas/functions/control-plane/src/pool/pool.test.ts | 5 +++++ lambdas/functions/control-plane/src/pool/pool.ts | 2 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lambdas/functions/control-plane/src/pool/pool.test.ts b/lambdas/functions/control-plane/src/pool/pool.test.ts index 9144b40d2a..f1b80a85fe 100644 --- a/lambdas/functions/control-plane/src/pool/pool.test.ts +++ b/lambdas/functions/control-plane/src/pool/pool.test.ts @@ -268,6 +268,7 @@ describe('Test simple pool.', () => { }); it('Should not top up if pool size is invalid.', async () => { + process.env.RUNNER_LABELS = undefined; await expect(await adjust({ poolSize: -2 })).resolves; expect(createRunners).not.toHaveBeenCalled(); }); @@ -369,6 +370,10 @@ describe('Test simple pool.', () => { status: 'queued', labels: [...LABELS, 'label3'], }, + { + status: 'queued', + labels: [], + }, ]); await expect(await adjust({ poolSize: -1 })).resolves; expect(createRunners).not.toHaveBeenCalled(); diff --git a/lambdas/functions/control-plane/src/pool/pool.ts b/lambdas/functions/control-plane/src/pool/pool.ts index fd34cdc049..45662c08a9 100644 --- a/lambdas/functions/control-plane/src/pool/pool.ts +++ b/lambdas/functions/control-plane/src/pool/pool.ts @@ -69,8 +69,6 @@ export async function adjust(event: PoolEvent): Promise { const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); const githubInstallationClient = await createOctoClient(ghAuth.token, ghesApiUrl); - githubInstallationClient.paginate; - // Get statusses of runners registed in GitHub const runnerStatusses = await getGitHubRegisteredRunnnerStatusses( githubInstallationClient, From 6a7b0e7b70e28ce6c14f1432972920adc872bbf9 Mon Sep 17 00:00:00 2001 From: galargh Date: Sun, 9 Feb 2025 12:20:49 +0100 Subject: [PATCH 6/8] chore: apply suggestions from the code review --- README.md | 4 +- docs/configuration.md | 13 +- examples/ephemeral/main.tf | 2 +- .../control-plane/src/lambda.test.ts | 4 +- .../functions/control-plane/src/local-pool.ts | 2 +- .../functions/control-plane/src/modules.d.ts | 2 +- .../control-plane/src/pool/pool.test.ts | 237 +++++++++--------- .../functions/control-plane/src/pool/pool.ts | 216 +++++++++------- main.tf | 2 +- modules/multi-runner/README.md | 2 +- modules/multi-runner/runners.tf | 2 +- modules/multi-runner/variables.tf | 7 +- modules/runners/README.md | 4 +- modules/runners/pool.tf | 2 +- modules/runners/pool/README.md | 2 +- modules/runners/pool/main.tf | 5 +- modules/runners/pool/variables.tf | 5 +- modules/runners/variables.tf | 5 +- variables.tf | 5 +- 19 files changed, 287 insertions(+), 234 deletions(-) diff --git a/README.md b/README.md index 5934a199c2..592be80588 100644 --- a/README.md +++ b/README.md @@ -166,11 +166,11 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh) | [matcher\_config\_parameter\_store\_tier](#input\_matcher\_config\_parameter\_store\_tier) | The tier of the parameter store for the matcher configuration. Valid values are `Standard`, and `Advanced`. | `string` | `"Standard"` | no | | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | | [minimum\_running\_time\_in\_minutes](#input\_minimum\_running\_time\_in\_minutes) | The time an ec2 action runner should be running at minimum before terminated, if not busy. | `number` | `null` | no | -| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting the pool size to -1 will adjust the pool based on the number of queued jobs. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). |
list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
}))
| `[]` | no | +| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false). |
list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
}))
| `[]` | no | | [pool\_lambda\_memory\_size](#input\_pool\_lambda\_memory\_size) | Memory size limit for scale-up lambda. | `number` | `512` | no | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | -| [pool\_runner\_owner](#input\_pool\_runner\_owner) | The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma. | `string` | `null` | no | +| [pool\_runner\_owners](#input\_pool\_runner\_owners) | The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma. | `string` | `null` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | | [queue\_encryption](#input\_queue\_encryption) | Configure how data on queues managed by the modules in ecrypted at REST. Options are encryped via SSE, non encrypted and via KMSS. By default encryptes via SSE is enabled. See for more details the Terraform `aws_sqs_queue` resource https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue. |
object({
kms_data_key_reuse_period_seconds = number
kms_master_key_id = string
sqs_managed_sse_enabled = bool
})
|
{
"kms_data_key_reuse_period_seconds": null,
"kms_master_key_id": null,
"sqs_managed_sse_enabled": true
}
| no | | [redrive\_build\_queue](#input\_redrive\_build\_queue) | Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries. |
object({
enabled = bool
maxReceiveCount = number
})
|
{
"enabled": false,
"maxReceiveCount": null
}
| no | diff --git a/docs/configuration.md b/docs/configuration.md index c7f53121ed..236cba048e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -62,16 +62,17 @@ module "runners" { ## Pool -The module supports two options for keeping a pool of runners. One is via a pool which only supports org-level runners, the second option is [keeping runners idle](#idle-runners). +The module supports two options for keeping a pool of runners. One is via a pool, the second option is [keeping runners idle](#idle-runners). The pool is introduced in combination with the ephemeral runners and is primarily meant to ensure if any event is unexpectedly dropped and no runner was created, the pool can pick up the job. The pool is maintained by a lambda. Each time the lambda is triggered a check is performed to ensure the number of idle runners managed by the module matches the expected pool size. If not, the pool will be adjusted. Keep in mind that the scale down function is still active and will terminate instances that are detected as idle. ```hcl -pool_runner_owner = "my-org" # Org to which the runners are added +pool_runner_owners = "my-org" # Org to which the runners are added pool_config = [{ size = 20 # size of the pool schedule_expression = "cron(* * * * ? *)" # cron expression to trigger the adjustment of the pool schedule_expression_timezone = "Australia/Sydney" # optional time zone (defaults to UTC) + dynamic_pool_scaling_enabled = false # EXPERIMENTAL: if optionaly enabled, the pool will be scaled dynamically, up to the pool size, based on the number of queued jobs (defaults to false) }] ``` @@ -334,3 +335,11 @@ resource "aws_iam_role_policy" "event_rule_firehose_role" { NOTE: By default, a runner AMI update requires a re-apply of this terraform config (the runner AMI ID is looked up by a terraform data source). To avoid this, you can use `ami_id_ssm_parameter_name` to have the scale-up lambda dynamically lookup the runner AMI ID from an SSM parameter at instance launch time. Said SSM parameter is managed outside of this module (e.g. by a runner AMI build workflow). + +### Dynamic Pool Scaling + +This feature allows the pool to grow dynamically based on the number of queued jobs. It can be enabled by setting the `pool_config.dynamic_pool_scaling_enabled` to `true`. + +If the feature is enabled, the expected pool size will be calculated based on the number of queued jobs. The effective size of the pool will be set to the minimum of the number of queued jobs and the configured pool size. + +This feature is disabled by default because the retrieval of queued jobs may exhause the GitHub API for larger deployments and cause rate limits. For larger deployments with a lot of frequent jobs having a permanent small pool available could be a better choice. diff --git a/examples/ephemeral/main.tf b/examples/ephemeral/main.tf index 25a79fb718..6bb8e71bce 100644 --- a/examples/ephemeral/main.tf +++ b/examples/ephemeral/main.tf @@ -63,7 +63,7 @@ module "runners" { enable_ephemeral_runners = true # # Example of simple pool usages - # pool_runner_owner = "YOUR_ORG" + # pool_runner_owners = "YOUR_ORG" # pool_config = [{ # size = 3 # schedule_expression = "cron(0/3 14 * * ? *)" # every 3 minutes between 14:00 and 15:00 diff --git a/lambdas/functions/control-plane/src/lambda.test.ts b/lambdas/functions/control-plane/src/lambda.test.ts index 69ccad17a0..6551bfd38e 100644 --- a/lambdas/functions/control-plane/src/lambda.test.ts +++ b/lambdas/functions/control-plane/src/lambda.test.ts @@ -152,7 +152,7 @@ describe('Adjust pool.', () => { resolve(); }); }); - await expect(adjustPool({ poolSize: 2 }, context)).resolves.not.toThrow(); + await expect(adjustPool({ poolSize: 2, dynamicPoolScalingEnabled: false }, context)).resolves.not.toThrow(); }); it('Handle error for adjusting pool.', async () => { @@ -160,7 +160,7 @@ describe('Adjust pool.', () => { const error = new Error('Handle error for adjusting pool.'); mock.mockRejectedValue(error); const logSpy = jest.spyOn(logger, 'error'); - await adjustPool({ poolSize: 0 }, context); + await adjustPool({ poolSize: 0, dynamicPoolScalingEnabled: false }, context); expect(logSpy).lastCalledWith(expect.stringContaining(error.message), expect.anything()); }); }); diff --git a/lambdas/functions/control-plane/src/local-pool.ts b/lambdas/functions/control-plane/src/local-pool.ts index ab8c74a1a0..f53ca23a05 100644 --- a/lambdas/functions/control-plane/src/local-pool.ts +++ b/lambdas/functions/control-plane/src/local-pool.ts @@ -1,7 +1,7 @@ import { adjust } from './pool/pool'; export function run(): void { - adjust({ poolSize: 1 }) + adjust({ poolSize: 1, dynamicPoolScalingEnabled: false }) .then() .catch((e) => { console.log(e); diff --git a/lambdas/functions/control-plane/src/modules.d.ts b/lambdas/functions/control-plane/src/modules.d.ts index 7570f29035..1667eacf01 100644 --- a/lambdas/functions/control-plane/src/modules.d.ts +++ b/lambdas/functions/control-plane/src/modules.d.ts @@ -14,7 +14,7 @@ declare namespace NodeJS { PARAMETER_GITHUB_APP_CLIENT_SECRET_NAME: string; PARAMETER_GITHUB_APP_ID_NAME: string; PARAMETER_GITHUB_APP_KEY_BASE64_NAME: string; - RUNNER_OWNER: string; + RUNNER_OWNERS: string; SCALE_DOWN_CONFIG: string; SSM_TOKEN_PATH: string; SSM_CLEANUP_CONFIG: string; diff --git a/lambdas/functions/control-plane/src/pool/pool.test.ts b/lambdas/functions/control-plane/src/pool/pool.test.ts index dfc1494841..c2ca9e566a 100644 --- a/lambdas/functions/control-plane/src/pool/pool.test.ts +++ b/lambdas/functions/control-plane/src/pool/pool.test.ts @@ -6,7 +6,7 @@ import nock from 'nock'; import { listEC2Runners } from '../aws/runners'; import * as ghAuth from '../github/auth'; import { createRunners, getGitHubEnterpriseApiUrl } from '../scale-runners/scale-up'; -import { adjust } from './pool'; +import * as pool from './pool'; const mockOctokit = { paginate: (f: (arg0: unknown) => unknown[], o: unknown) => f(o), @@ -35,6 +35,8 @@ jest.mock('./../aws/runners', () => ({ jest.mock('./../github/auth'); jest.mock('../scale-runners/scale-up'); +const { adjust, getNumberOfQueuedJobs } = pool; + const mocktokit = Octokit as jest.MockedClass; const mockedAppAuth = mocked(ghAuth.createGithubAppAuth, { shallow: false, @@ -42,6 +44,7 @@ const mockedAppAuth = mocked(ghAuth.createGithubAppAuth, { const mockedInstallationAuth = mocked(ghAuth.createGithubInstallationAuth, { shallow: false }); const mockCreateClient = mocked(ghAuth.createOctokitClient, { shallow: false }); const mockListRunners = mocked(listEC2Runners); +const mockGetNumberOfQueuedJobs = jest.spyOn(pool, 'getNumberOfQueuedJobs'); const cleanEnv = process.env; @@ -145,7 +148,7 @@ beforeEach(() => { process.env.SSM_TOKEN_PATH = '/github-action-runners/default/runners/tokens'; process.env.INSTANCE_TYPES = 'm5.large'; process.env.INSTANCE_TARGET_CAPACITY_TYPE = 'spot'; - process.env.RUNNER_OWNER = ORG; + process.env.RUNNER_OWNERS = ORG; process.env.RUNNER_BOOT_TIME_IN_MINUTES = MINIMUM_TIME_RUNNING.toString(); process.env.RUNNER_LABELS = LABELS.join(','); @@ -204,7 +207,7 @@ describe('Test simple pool.', () => { }); }); it('Top up pool with pool size 2 registered.', async () => { - await expect(await adjust({ poolSize: 3 })).resolves; + await expect(await adjust({ poolSize: 3, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).toHaveBeenCalledTimes(1); expect(createRunners).toHaveBeenCalledWith( expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), @@ -214,7 +217,7 @@ describe('Test simple pool.', () => { }); it('Should not top up if pool size is reached.', async () => { - await expect(await adjust({ poolSize: 1 })).resolves; + await expect(await adjust({ poolSize: 1, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).not.toHaveBeenCalled(); }); @@ -240,7 +243,7 @@ describe('Test simple pool.', () => { ]); // 2 idle + 1 booting = 3, top up with 2 to match a pool of 5 - await expect(await adjust({ poolSize: 5 })).resolves; + await expect(await adjust({ poolSize: 5, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).toHaveBeenCalledWith( expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), expect.objectContaining({ numberOfRunners: 2 }), @@ -269,13 +272,13 @@ describe('Test simple pool.', () => { }, ]); - await expect(await adjust({ poolSize: 2 })).resolves; + await expect(await adjust({ poolSize: 2, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).not.toHaveBeenCalled(); }); it('Should not top up if pool size is invalid.', async () => { process.env.RUNNER_LABELS = undefined; - await expect(await adjust({ poolSize: -2 })).resolves; + await expect(await adjust({ poolSize: -2, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).not.toHaveBeenCalled(); }); }); @@ -289,7 +292,7 @@ describe('Test simple pool.', () => { }); it('Top up if the pool size is set to 5', async () => { - await expect(await adjust({ poolSize: 5 })).resolves; + await expect(await adjust({ poolSize: 5, dynamicPoolScalingEnabled: false })).resolves; // 2 idle, top up with 3 to match a pool of 5 expect(createRunners).toHaveBeenCalledWith( expect.anything(), @@ -308,7 +311,7 @@ describe('Test simple pool.', () => { }); it('Top up if the pool size is set to 5', async () => { - await expect(await adjust({ poolSize: 5 })).resolves; + await expect(await adjust({ poolSize: 5, dynamicPoolScalingEnabled: false })).resolves; // 2 idle, top up with 3 to match a pool of 5 expect(createRunners).toHaveBeenCalledWith( expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), @@ -362,7 +365,7 @@ describe('Test simple pool.', () => { }, ]); - await expect(await adjust({ poolSize: 5 })).resolves; + await expect(await adjust({ poolSize: 5, dynamicPoolScalingEnabled: false })).resolves; // 2 idle, 2 prefixed idle top up with 1 to match a pool of 5 expect(createRunners).toHaveBeenCalledWith( expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), @@ -372,86 +375,53 @@ describe('Test simple pool.', () => { }); }); - describe('With Negative Pool Size', () => { - // effective pool size is 2 (1 queued job with matching labels x 1 workflows x 2 accessible repositories) - it('Should not top up if there are fewer queued jobs than idle runners.', async () => { - mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async ({ owner, repo }) => [ - { - repository: { - owner: { login: owner }, - name: repo, - }, - id: 1, - attempt_number: 1, - }, - ]); - mockOctokit.actions.listJobsForWorkflowRunAttempt.mockImplementation(async () => [ - { - status: 'completed', - labels: LABELS, - }, - { - status: 'queued', - labels: LABELS, - }, - { - status: 'queued', - labels: [...LABELS, 'label3'], - }, - { - status: 'queued', - labels: [], - }, - ]); - await expect(await adjust({ poolSize: -1 })).resolves; - expect(createRunners).not.toHaveBeenCalled(); - }); + describe('With Dynamic Pool Scaling Enabled', () => { + const testCases = [ + { poolSize: 1, numberOfRunners: 0 }, + { poolSize: 2, numberOfRunners: 0 }, + { poolSize: 4, numberOfRunners: 2, numberOfQueuedJobs: 6 }, + { poolSize: 4, numberOfRunners: 2, numberOfQueuedJobs: 4 }, + { poolSize: 4, numberOfRunners: 1, numberOfQueuedJobs: 3 }, + { poolSize: 4, numberOfRunners: 0, numberOfQueuedJobs: 2 }, + { poolSize: 4, numberOfRunners: 0, numberOfQueuedJobs: 0 }, + ]; + + for (const { poolSize, numberOfRunners, numberOfQueuedJobs } of testCases) { + let message = numberOfRunners === 0 ? 'Should not top up' : `Should top up with ${numberOfRunners} runners`; + message += ` when the maximum pool size is ${poolSize}, and there are 2 idle runners`; + if (numberOfQueuedJobs !== undefined) { + message += ` and ${numberOfQueuedJobs} queued jobs`; + } - // effective pool size is 8 (2 queued job with matching labels x 2 workflows x 2 accessible repositories) - it('Should top up if there are more queued jobs with matching labels than idle runners.', async () => { - mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async ({ owner, repo }) => [ - { - repository: { - owner: { login: owner }, - name: repo, - }, - id: 1, - attempt_number: 1, - }, - { - repository: { - owner: { login: owner }, - name: repo, - }, - id: 2, - attempt_number: 1, - }, - ]); - mockOctokit.actions.listJobsForWorkflowRunAttempt.mockImplementation(async () => [ - { - status: 'queued', - labels: LABELS, - }, - { - status: 'queued', - labels: LABELS, - }, - ]); - await expect(await adjust({ poolSize: -1 })).resolves; - expect(createRunners).toHaveBeenCalledTimes(1); - expect(createRunners).toHaveBeenCalledWith( - expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), - expect.objectContaining({ numberOfRunners: 6 }), - expect.anything(), - ); - }); + it(message, async () => { + if (numberOfQueuedJobs !== undefined) { + mockGetNumberOfQueuedJobs.mockReturnValueOnce(Promise.resolve(numberOfQueuedJobs)); + } + await expect(await adjust({ poolSize, dynamicPoolScalingEnabled: true })).resolves; + if (numberOfQueuedJobs === undefined) { + expect(mockGetNumberOfQueuedJobs).not.toHaveBeenCalled(); + } else { + expect(mockGetNumberOfQueuedJobs).toHaveBeenCalledTimes(1); + } + if (numberOfRunners === 0) { + expect(createRunners).not.toHaveBeenCalled(); + } else { + expect(createRunners).toHaveBeenCalledTimes(1); + expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), + expect.objectContaining({ numberOfRunners }), + expect.anything(), + ); + } + }); + } }); describe('With Runner Type Repo', () => { it('Should top up the repository runners pool', async () => { const runnerOwner = `${ORG}/my-repo-1`; - process.env.RUNNER_OWNER = runnerOwner; - await expect(await adjust({ poolSize: 3 })).resolves; + process.env.RUNNER_OWNERS = runnerOwner; + await expect(await adjust({ poolSize: 3, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).toHaveBeenCalledTimes(1); expect(createRunners).toHaveBeenCalledWith( expect.objectContaining({ runnerOwner, runnerType: 'Repo' }), @@ -462,40 +432,13 @@ describe('Test simple pool.', () => { it('Should top up the repository runners pool dynamically', async () => { const runnerOwner = `${ORG}/my-repo-1`; - process.env.RUNNER_OWNER = runnerOwner; - mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async ({ owner, repo }) => [ - { - repository: { - owner: { login: owner }, - name: repo, - }, - id: 1, - attempt_number: 1, - }, - { - repository: { - owner: { login: owner }, - name: repo, - }, - id: 2, - attempt_number: 1, - }, - ]); - mockOctokit.actions.listJobsForWorkflowRunAttempt.mockImplementation(async () => [ - { - status: 'queued', - labels: LABELS, - }, - { - status: 'queued', - labels: LABELS, - }, - ]); - await expect(await adjust({ poolSize: -1 })).resolves; + process.env.RUNNER_OWNERS = runnerOwner; + mockGetNumberOfQueuedJobs.mockReturnValueOnce(Promise.resolve(3)); + await expect(await adjust({ poolSize: 3, dynamicPoolScalingEnabled: true })).resolves; expect(createRunners).toHaveBeenCalledTimes(1); expect(createRunners).toHaveBeenCalledWith( expect.objectContaining({ runnerOwner, runnerType: 'Repo' }), - expect.objectContaining({ numberOfRunners: 2 }), + expect.objectContaining({ numberOfRunners: 1 }), expect.anything(), ); }); @@ -504,8 +447,8 @@ describe('Test simple pool.', () => { describe('With Multiple Runner Owners', () => { it('Should top up pools for all runner owners', async () => { const runnerOwners = [`${ORG}/my-repo-1`, `${ORG}/my-repo-2`]; - process.env.RUNNER_OWNER = runnerOwners.join(','); - await expect(await adjust({ poolSize: 3 })).resolves; + process.env.RUNNER_OWNERS = runnerOwners.join(','); + await expect(await adjust({ poolSize: 3, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).toHaveBeenCalledTimes(2); for (const runnerOwner of runnerOwners) { expect(createRunners).toHaveBeenCalledWith( @@ -517,3 +460,63 @@ describe('Test simple pool.', () => { }); }); }); + +describe('Test number of queued jobs retrieval.', () => { + let ghClient: Octokit; + + beforeEach(() => { + ghClient = new mocktokit(); + + mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async ({ owner, repo }) => [ + { + repository: { + owner: { login: owner }, + name: repo, + }, + id: 1, + attempt_number: 1, + }, + { + repository: { + owner: { login: owner }, + name: repo, + }, + id: 2, + attempt_number: 1, + }, + ]); + + mockOctokit.actions.listJobsForWorkflowRunAttempt.mockImplementation(async () => [ + { + status: 'queued', + labels: LABELS, + }, + { + status: 'queued', + labels: LABELS, + }, + { + status: 'queued', + labels: [...LABELS, 'label3'], + }, + { + status: 'in_progress', + labels: LABELS, + }, + ]); + }); + + it('Should retrieve the number of queued jobs for the org', async () => { + // 2 repos x 2 workflow runs x 2 queued jobs with matching labels + await expect(getNumberOfQueuedJobs(ghClient, ORG, 'Org', LABELS.join(','))).resolves.toBe(8); + }); + + for (const githubRepo of githubReposAccessibleToInstallation) { + it(`Should retrieve the number of queued jobs for the repo ${githubRepo.name}`, async () => { + // 1 repo x 2 workflow runs x 2 queued jobs with matching labels + await expect( + getNumberOfQueuedJobs(ghClient, `${githubRepo.owner.login}/${githubRepo.name}`, 'Repo', LABELS.join(',')), + ).resolves.toBe(4); + }); + } +}); diff --git a/lambdas/functions/control-plane/src/pool/pool.ts b/lambdas/functions/control-plane/src/pool/pool.ts index 7728e2ad8e..789d843846 100644 --- a/lambdas/functions/control-plane/src/pool/pool.ts +++ b/lambdas/functions/control-plane/src/pool/pool.ts @@ -14,6 +14,7 @@ type Repository = GetResponseDataTypeFromEndpointMethod export interface PoolEvent { poolSize: number; + dynamicPoolScalingEnabled: boolean; } interface RunnerStatus { @@ -21,6 +22,8 @@ interface RunnerStatus { status: string; } +// TODO: Move this function to a common module - a very similar function is +// defined in ../../webhook/src/runners/dispatch.ts function canRunJob(workflowJobLabels: string[], runnerLabels: string[]): boolean { runnerLabels = runnerLabels.map((label) => label.toLowerCase()); const matchLabels = workflowJobLabels.every((wl) => runnerLabels.includes(wl.toLowerCase())); @@ -45,7 +48,8 @@ export async function adjust(event: PoolEvent): Promise { const launchTemplateName = process.env.LAUNCH_TEMPLATE_NAME; const instanceMaxSpotPrice = process.env.INSTANCE_MAX_SPOT_PRICE; const instanceAllocationStrategy = process.env.INSTANCE_ALLOCATION_STRATEGY || 'lowest-price'; // same as AWS default - const runnerOwners = process.env.RUNNER_OWNER.split(','); + // RUNNER_OWNERS is a comma-split list of owners, which might be either org or repo owners + const runnerOwners = process.env.RUNNER_OWNERS.split(','); const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME; const tracingEnabled = yn(process.env.POWERTOOLS_TRACE_ENABLED, { default: false }); const onDemandFailoverOnError = process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS @@ -80,97 +84,66 @@ export async function adjust(event: PoolEvent): Promise { statuses: ['running'], }); - const numberOfRunnersInPool = calculatePooSize(ec2runners, runnerStatusses); - let topUp = 0; - if (event.poolSize >= 0) { - topUp = event.poolSize - numberOfRunnersInPool; - } else if (event.poolSize === -1) { - logger.info('Checking for queued jobs to determine pool size'); - let repos; - if (runnerType === 'Repo') { - repos = [repo]; - } else { - // @ts-expect-error The types normalized by paginate are not correct, - // because they only flatten .data, while in case of listReposAccessibleToInstallation, - // they should flatten .repositories. - const reposAccessibleToInstallation = (await githubInstallationClient.paginate( - githubInstallationClient.apps.listReposAccessibleToInstallation, - { - per_page: 100, - }, - )) as Repository[]; - repos = reposAccessibleToInstallation.filter((repo) => repo.owner.login === owner).map((repo) => repo.name); - } - const queuedWorkflowRuns = []; - for (const repo of repos) { - const workflowRuns = await githubInstallationClient.paginate( - githubInstallationClient.actions.listWorkflowRunsForRepo, - { - owner, - repo, - status: 'queued', - per_page: 100, - }, - ); - queuedWorkflowRuns.push(...workflowRuns); - } - const queuedJobs = []; - for (const workflowRun of queuedWorkflowRuns) { - const jobs = await githubInstallationClient.paginate( - githubInstallationClient.actions.listJobsForWorkflowRunAttempt, - { - owner: workflowRun.repository.owner.login, - repo: workflowRun.repository.name, - run_id: workflowRun.id, - attempt_number: workflowRun.run_attempt || 1, - per_page: 100, - }, - ); - queuedJobs.push(...jobs.filter((job) => job.status === 'queued')); - } - const numberOfQueuedJobs = queuedJobs.filter((job) => canRunJob(job.labels, runnerLabels.split(','))).length; - logger.info(`Found ${numberOfQueuedJobs} queued jobs`); - topUp = numberOfQueuedJobs - numberOfRunnersInPool; - } else { + if (event.poolSize <= 0) { logger.error(`Invalid pool size: ${event.poolSize}`); + return; } - if (topUp > 0) { - logger.info(`The pool will be topped up with ${topUp} runners.`); - await createRunners( - { - ephemeral, - enableJitConfig, - ghesBaseUrl, - runnerLabels, - runnerGroup, - runnerOwner, - runnerNamePrefix, - runnerType, - disableAutoUpdate: disableAutoUpdate, - ssmTokenPath, - ssmConfigPath, - }, - { - ec2instanceCriteria: { - instanceTypes, - targetCapacityType: instanceTargetCapacityType, - maxSpotPrice: instanceMaxSpotPrice, - instanceAllocationStrategy: instanceAllocationStrategy, - }, - environment, - launchTemplateName, - subnets, - numberOfRunners: topUp, - amiIdSsmParameterName, - tracingEnabled, - onDemandFailoverOnError, - }, - githubInstallationClient, - ); - } else { - logger.info(`Pool will not be topped up. Found ${numberOfRunnersInPool} managed idle runners.`); + const currentPoolSize = calculateCurrentPoolSize(ec2runners, runnerStatusses); + + if (currentPoolSize >= event.poolSize) { + logger.info(`Pool will not be topped up. Found ${currentPoolSize} managed idle runners.`); + return; + } + + const targetPoolSize = await calculateTargetPoolSize( + githubInstallationClient, + runnerOwner, + runnerType, + runnerLabels, + event.poolSize, + event.dynamicPoolScalingEnabled, + ); + + if (currentPoolSize >= targetPoolSize) { + logger.info(`Pool will not be topped up. Found ${currentPoolSize} managed idle runners.`); + return; } + + const topUp = targetPoolSize - currentPoolSize; + + logger.info(`The pool will be topped up with ${topUp} runners.`); + await createRunners( + { + ephemeral, + enableJitConfig, + ghesBaseUrl, + runnerLabels, + runnerGroup, + runnerOwner, + runnerNamePrefix, + runnerType, + disableAutoUpdate: disableAutoUpdate, + ssmTokenPath, + ssmConfigPath, + }, + { + ec2instanceCriteria: { + instanceTypes, + targetCapacityType: instanceTargetCapacityType, + maxSpotPrice: instanceMaxSpotPrice, + instanceAllocationStrategy: instanceAllocationStrategy, + }, + environment, + launchTemplateName, + subnets, + numberOfRunners: topUp, + amiIdSsmParameterName, + tracingEnabled, + onDemandFailoverOnError, + }, + githubInstallationClient, + ); } } @@ -185,7 +158,7 @@ async function getInstallationId(ghesApiUrl: string, org: string): Promise): number { +function calculateCurrentPoolSize(ec2runners: RunnerList[], runnerStatus: Map): number { // Runner should be considered idle if it is still booting, or is idle in GitHub let numberOfRunnersInPool = 0; for (const ec2Instance of ec2runners) { @@ -209,6 +182,71 @@ function calculatePooSize(ec2runners: RunnerList[], runnerStatus: Map { + if (!dynamicPoolScalingEnabled) { + return poolSize; + } + + // This call is made on the exports object to enable mocking it in tests + const numberOfQueuedJobs = await exports.getNumberOfQueuedJobs(ghClient, runnerOwner, runnerType, runnerLabels); + + return Math.min(poolSize, numberOfQueuedJobs); +} + +// This function is exported for testing purposes only +export async function getNumberOfQueuedJobs( + ghClient: Octokit, + runnerOwner: string, + runnerType: RunnerType, + runnerLabels: string, +): Promise { + logger.info('Checking for queued jobs to determine pool size'); + const [owner, repo] = runnerOwner.split('/'); + let repos; + if (runnerType === 'Repo') { + repos = [repo]; + } else { + // @ts-expect-error The types normalized by paginate are not correct, + // because they only flatten .data, while in case of listReposAccessibleToInstallation, + // they should flatten .repositories. + const reposAccessibleToInstallation = (await ghClient.paginate(ghClient.apps.listReposAccessibleToInstallation, { + per_page: 100, + })) as Repository[]; + repos = reposAccessibleToInstallation.filter((repo) => repo.owner.login === owner).map((repo) => repo.name); + } + const queuedWorkflowRuns = []; + for (const repo of repos) { + const workflowRuns = await ghClient.paginate(ghClient.actions.listWorkflowRunsForRepo, { + owner, + repo, + status: 'queued', + per_page: 100, + }); + queuedWorkflowRuns.push(...workflowRuns); + } + const queuedJobs = []; + for (const workflowRun of queuedWorkflowRuns) { + const jobs = await ghClient.paginate(ghClient.actions.listJobsForWorkflowRunAttempt, { + owner: workflowRun.repository.owner.login, + repo: workflowRun.repository.name, + run_id: workflowRun.id, + attempt_number: workflowRun.run_attempt || 1, + per_page: 100, + }); + queuedJobs.push(...jobs.filter((job) => job.status === 'queued')); + } + const numberOfQueuedJobs = queuedJobs.filter((job) => canRunJob(job.labels, runnerLabels.split(','))).length; + logger.info(`Found ${numberOfQueuedJobs} queued jobs`); + return numberOfQueuedJobs; +} + async function getGitHubRegisteredRunnnerStatusses( ghClient: Octokit, runnerOwner: string, diff --git a/main.tf b/main.tf index e4e18000fd..1ebbb2262b 100644 --- a/main.tf +++ b/main.tf @@ -265,7 +265,7 @@ module "runners" { pool_config = var.pool_config pool_lambda_memory_size = var.pool_lambda_memory_size pool_lambda_timeout = var.pool_lambda_timeout - pool_runner_owner = var.pool_runner_owner + pool_runner_owners = var.pool_runner_owners pool_lambda_reserved_concurrent_executions = var.pool_lambda_reserved_concurrent_executions ssm_housekeeper = var.runners_ssm_housekeeper diff --git a/modules/multi-runner/README.md b/modules/multi-runner/README.md index 6d5b74f1b4..0def32335a 100644 --- a/modules/multi-runner/README.md +++ b/modules/multi-runner/README.md @@ -148,7 +148,7 @@ module "multi-runner" { | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | | [matcher\_config\_parameter\_store\_tier](#input\_matcher\_config\_parameter\_store\_tier) | The tier of the parameter store for the matcher configuration. Valid values are `Standard`, and `Advanced`. | `string` | `"Standard"` | no | | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default metrics are disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | -| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_hook\_job\_started: "Script to be ran in the runner environment at the beginning of every job"
runner\_hook\_job\_completed: "Script to be ran in the runner environment at the end of every job"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting the pool size to -1 will adjust the pool based on the number of queued jobs. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owner = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_hook_job_started = optional(string, "")
runner_hook_job_completed = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | +| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_hook\_job\_started: "Script to be ran in the runner environment at the beginning of every job"
runner\_hook\_job\_completed: "Script to be ran in the runner environment at the end of every job"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owners = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_hook_job_started = optional(string, "")
runner_hook_job_completed = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf index 8fe23d506d..96ed48fb0a 100644 --- a/modules/multi-runner/runners.tf +++ b/modules/multi-runner/runners.tf @@ -110,7 +110,7 @@ module "runners" { pool_config = each.value.runner_config.pool_config pool_lambda_timeout = var.pool_lambda_timeout - pool_runner_owner = each.value.runner_config.pool_runner_owner + pool_runner_owners = each.value.runner_config.pool_runner_owners pool_lambda_reserved_concurrent_executions = var.pool_lambda_reserved_concurrent_executions associate_public_ipv4_address = var.associate_public_ipv4_address diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index 63f059bf56..64b1594576 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -58,7 +58,7 @@ variable "multi_runner_config" { instance_types = list(string) job_queue_retention_in_seconds = optional(number, 86400) minimum_running_time_in_minutes = optional(number, null) - pool_runner_owner = optional(string, null) + pool_runner_owners = optional(string, null) runner_as_root = optional(bool, false) runner_boot_time_in_minutes = optional(number, 5) runner_disable_default_labels = optional(bool, false) @@ -110,6 +110,7 @@ variable "multi_runner_config" { volume_size = 30 }]) pool_config = optional(list(object({ + dynamic_pool_scaling_enabled = optional(bool, false) schedule_expression = string schedule_expression_timezone = optional(string) size = number @@ -162,7 +163,7 @@ variable "multi_runner_config" { instance_types: "List of instance types for the action runner. Defaults are based on runner_os (al2023 for linux and Windows Server Core for win)." job_queue_retention_in_seconds: "The number of seconds the job is held in the queue before it is purged" minimum_running_time_in_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy." - pool_runner_owner: "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma." + pool_runner_owners: "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma." runner_additional_security_group_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi_runner_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi_runner_config, the additional security group(s) will be applied to the individual runner." runner_as_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored." runner_boot_time_in_minutes: "The minimum time for an EC2 runner to boot and register as a runner." @@ -191,7 +192,7 @@ variable "multi_runner_config" { runner_log_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details." block_device_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`." job_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app." - pool_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting the pool size to -1 will adjust the pool based on the number of queued jobs. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)." + pool_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false)." } matcherConfig: { labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`" diff --git a/modules/runners/README.md b/modules/runners/README.md index 2868dbc696..a17cd936a3 100644 --- a/modules/runners/README.md +++ b/modules/runners/README.md @@ -188,11 +188,11 @@ yarn run dist | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default metrics are disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | | [minimum\_running\_time\_in\_minutes](#input\_minimum\_running\_time\_in\_minutes) | The time an ec2 action runner should be running at minimum before terminated if non busy. If not set the default is calculated based on the OS. | `number` | `null` | no | | [overrides](#input\_overrides) | This map provides the possibility to override some defaults. The following attributes are supported: `name_sg` overrides the `Name` tag for all security groups created by this module. `name_runner_agent_instance` overrides the `Name` tag for the ec2 instance defined in the auto launch configuration. `name_docker_machine_runners` overrides the `Name` tag spot instances created by the runner agent. | `map(string)` |
{
"name_runner": "",
"name_sg": ""
}
| no | -| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting the pool size to -1 will adjust the pool based on the number of queued jobs. Use `schedule_expression_timezone ` to override the schedule time zone (defaults to UTC). |
list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
}))
| `[]` | no | +| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone ` to override the schedule time zone (defaults to UTC). |
list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
}))
| `[]` | no | | [pool\_lambda\_memory\_size](#input\_pool\_lambda\_memory\_size) | Lambda Memory size limit in MB for pool lambda | `number` | `512` | no | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | -| [pool\_runner\_owner](#input\_pool\_runner\_owner) | The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma. | `string` | `null` | no | +| [pool\_runner\_owners](#input\_pool\_runner\_owners) | The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma. | `string` | `null` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | | [role\_path](#input\_role\_path) | The path that will be added to the role; if not set, the prefix will be used. | `string` | `null` | no | | [role\_permissions\_boundary](#input\_role\_permissions\_boundary) | Permissions boundary that will be added to the created role for the lambda. | `string` | `null` | no | diff --git a/modules/runners/pool.tf b/modules/runners/pool.tf index 16a13aeccd..0a17d5a0ff 100644 --- a/modules/runners/pool.tf +++ b/modules/runners/pool.tf @@ -46,7 +46,7 @@ module "pool" { launch_template = aws_launch_template.runner group_name = var.runner_group_name name_prefix = var.runner_name_prefix - pool_owner = var.pool_runner_owner + pool_owners = var.pool_runner_owners role = aws_iam_role.runner } subnet_ids = var.subnet_ids diff --git a/modules/runners/pool/README.md b/modules/runners/pool/README.md index cffad1213a..b7636612e2 100644 --- a/modules/runners/pool/README.md +++ b/modules/runners/pool/README.md @@ -48,7 +48,7 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [aws\_partition](#input\_aws\_partition) | (optional) partition for the arn if not 'aws' | `string` | `"aws"` | no | -| [config](#input\_config) | Lookup details in parent module. |
object({
lambda = object({
log_level = string
logging_retention_in_days = number
logging_kms_key_id = string
reserved_concurrent_executions = number
s3_bucket = string
s3_key = string
s3_object_version = string
security_group_ids = list(string)
runtime = string
architecture = string
memory_size = number
timeout = number
zip = string
subnet_ids = list(string)
})
tags = map(string)
ghes = object({
url = string
ssl_verify = string
})
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
subnet_ids = list(string)
runner = object({
disable_runner_autoupdate = bool
ephemeral = bool
enable_jit_config = bool
enable_on_demand_failover_for_errors = list(string)
boot_time_in_minutes = number
labels = list(string)
launch_template = object({
name = string
})
group_name = string
name_prefix = string
pool_owner = string
role = object({
arn = string
})
})
instance_types = list(string)
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
prefix = string
pool = list(object({
schedule_expression = string
schedule_expression_timezone = string
size = number
}))
role_permissions_boundary = string
kms_key_arn = string
ami_kms_key_arn = string
role_path = string
ssm_token_path = string
ssm_config_path = string
ami_id_ssm_parameter_name = string
ami_id_ssm_parameter_read_policy_arn = string
arn_ssm_parameters_path_config = string
lambda_tags = map(string)
user_agent = string
})
| n/a | yes | +| [config](#input\_config) | Lookup details in parent module. |
object({
lambda = object({
log_level = string
logging_retention_in_days = number
logging_kms_key_id = string
reserved_concurrent_executions = number
s3_bucket = string
s3_key = string
s3_object_version = string
security_group_ids = list(string)
runtime = string
architecture = string
memory_size = number
timeout = number
zip = string
subnet_ids = list(string)
})
tags = map(string)
ghes = object({
url = string
ssl_verify = string
})
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
subnet_ids = list(string)
runner = object({
disable_runner_autoupdate = bool
ephemeral = bool
enable_jit_config = bool
enable_on_demand_failover_for_errors = list(string)
boot_time_in_minutes = number
labels = list(string)
launch_template = object({
name = string
})
group_name = string
name_prefix = string
pool_owners = string
role = object({
arn = string
})
})
instance_types = list(string)
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
prefix = string
pool = list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = string
size = number
}))
role_permissions_boundary = string
kms_key_arn = string
ami_kms_key_arn = string
role_path = string
ssm_token_path = string
ssm_config_path = string
ami_id_ssm_parameter_name = string
ami_id_ssm_parameter_read_policy_arn = string
arn_ssm_parameters_path_config = string
lambda_tags = map(string)
user_agent = string
})
| n/a | yes | | [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | ## Outputs diff --git a/modules/runners/pool/main.tf b/modules/runners/pool/main.tf index 364d315439..e8c658863f 100644 --- a/modules/runners/pool/main.tf +++ b/modules/runners/pool/main.tf @@ -38,7 +38,7 @@ resource "aws_lambda_function" "pool" { RUNNER_LABELS = lower(join(",", var.config.runner.labels)) RUNNER_GROUP_NAME = var.config.runner.group_name RUNNER_NAME_PREFIX = var.config.runner.name_prefix - RUNNER_OWNER = var.config.runner.pool_owner + RUNNER_OWNERS = var.config.runner.pool_owners SSM_TOKEN_PATH = var.config.ssm_token_path SSM_CONFIG_PATH = var.config.ssm_config_path SUBNET_IDS = join(",", var.config.subnet_ids) @@ -214,7 +214,8 @@ resource "aws_scheduler_schedule" "pool" { arn = aws_lambda_function.pool.arn role_arn = aws_iam_role.scheduler.arn input = jsonencode({ - poolSize = each.value.size + poolSize = each.value.size + dynamic_pool_scaling_enabled = each.value.dynamic_pool_scaling_enabled }) } } diff --git a/modules/runners/pool/variables.tf b/modules/runners/pool/variables.tf index baf9746bbb..4f28544533 100644 --- a/modules/runners/pool/variables.tf +++ b/modules/runners/pool/variables.tf @@ -39,7 +39,7 @@ variable "config" { }) group_name = string name_prefix = string - pool_owner = string + pool_owners = string role = object({ arn = string }) @@ -50,6 +50,7 @@ variable "config" { instance_max_spot_price = string prefix = string pool = list(object({ + dynamic_pool_scaling_enabled = bool schedule_expression = string schedule_expression_timezone = string size = number @@ -83,5 +84,3 @@ variable "tracing_config" { }) default = {} } - - diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index 55554c7870..61700b0c47 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -544,7 +544,7 @@ variable "pool_lambda_memory_size" { default = 512 } -variable "pool_runner_owner" { +variable "pool_runner_owners" { description = "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma." type = string default = null @@ -557,8 +557,9 @@ variable "pool_lambda_reserved_concurrent_executions" { } variable "pool_config" { - description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting the pool size to -1 will adjust the pool based on the number of queued jobs. Use `schedule_expression_timezone ` to override the schedule time zone (defaults to UTC)." + description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone ` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false)." type = list(object({ + dynamic_pool_scaling_enabled = optional(bool, false) schedule_expression = string schedule_expression_timezone = optional(string) size = number diff --git a/variables.tf b/variables.tf index ee168fb55b..936205b6cc 100644 --- a/variables.tf +++ b/variables.tf @@ -690,7 +690,7 @@ variable "pool_lambda_timeout" { default = 60 } -variable "pool_runner_owner" { +variable "pool_runner_owners" { description = "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma." type = string default = null @@ -703,8 +703,9 @@ variable "pool_lambda_reserved_concurrent_executions" { } variable "pool_config" { - description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Setting the pool size to -1 will adjust the pool based on the number of queued jobs. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)." + description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false)." type = list(object({ + dynamic_pool_scaling_enabled = optional(bool, false) schedule_expression = string schedule_expression_timezone = optional(string) size = number From 317a48fe697052cadb92bbaf259d52cb1832437f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 23 Feb 2025 17:10:27 +0000 Subject: [PATCH 7/8] docs: auto update terraform docs --- modules/multi-runner/README.md | 2 +- modules/runners/README.md | 2 +- modules/runners/pool/README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/multi-runner/README.md b/modules/multi-runner/README.md index 0def32335a..8e4cb714eb 100644 --- a/modules/multi-runner/README.md +++ b/modules/multi-runner/README.md @@ -148,7 +148,7 @@ module "multi-runner" { | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | | [matcher\_config\_parameter\_store\_tier](#input\_matcher\_config\_parameter\_store\_tier) | The tier of the parameter store for the matcher configuration. Valid values are `Standard`, and `Advanced`. | `string` | `"Standard"` | no | | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default metrics are disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | -| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_hook\_job\_started: "Script to be ran in the runner environment at the beginning of every job"
runner\_hook\_job\_completed: "Script to be ran in the runner environment at the end of every job"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owners = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_hook_job_started = optional(string, "")
runner_hook_job_completed = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | +| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owners: "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_hook\_job\_started: "Script to be ran in the runner environment at the beginning of every job"
runner\_hook\_job\_completed: "Script to be ran in the runner environment at the end of every job"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owners = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_hook_job_started = optional(string, "")
runner_hook_job_completed = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | diff --git a/modules/runners/README.md b/modules/runners/README.md index a17cd936a3..c7802633e6 100644 --- a/modules/runners/README.md +++ b/modules/runners/README.md @@ -188,7 +188,7 @@ yarn run dist | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default metrics are disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | | [minimum\_running\_time\_in\_minutes](#input\_minimum\_running\_time\_in\_minutes) | The time an ec2 action runner should be running at minimum before terminated if non busy. If not set the default is calculated based on the OS. | `number` | `null` | no | | [overrides](#input\_overrides) | This map provides the possibility to override some defaults. The following attributes are supported: `name_sg` overrides the `Name` tag for all security groups created by this module. `name_runner_agent_instance` overrides the `Name` tag for the ec2 instance defined in the auto launch configuration. `name_docker_machine_runners` overrides the `Name` tag spot instances created by the runner agent. | `map(string)` |
{
"name_runner": "",
"name_sg": ""
}
| no | -| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone ` to override the schedule time zone (defaults to UTC). |
list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
}))
| `[]` | no | +| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone ` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false). |
list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
}))
| `[]` | no | | [pool\_lambda\_memory\_size](#input\_pool\_lambda\_memory\_size) | Lambda Memory size limit in MB for pool lambda | `number` | `512` | no | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | diff --git a/modules/runners/pool/README.md b/modules/runners/pool/README.md index b7636612e2..221760f568 100644 --- a/modules/runners/pool/README.md +++ b/modules/runners/pool/README.md @@ -48,7 +48,7 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [aws\_partition](#input\_aws\_partition) | (optional) partition for the arn if not 'aws' | `string` | `"aws"` | no | -| [config](#input\_config) | Lookup details in parent module. |
object({
lambda = object({
log_level = string
logging_retention_in_days = number
logging_kms_key_id = string
reserved_concurrent_executions = number
s3_bucket = string
s3_key = string
s3_object_version = string
security_group_ids = list(string)
runtime = string
architecture = string
memory_size = number
timeout = number
zip = string
subnet_ids = list(string)
})
tags = map(string)
ghes = object({
url = string
ssl_verify = string
})
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
subnet_ids = list(string)
runner = object({
disable_runner_autoupdate = bool
ephemeral = bool
enable_jit_config = bool
enable_on_demand_failover_for_errors = list(string)
boot_time_in_minutes = number
labels = list(string)
launch_template = object({
name = string
})
group_name = string
name_prefix = string
pool_owners = string
role = object({
arn = string
})
})
instance_types = list(string)
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
prefix = string
pool = list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = string
size = number
}))
role_permissions_boundary = string
kms_key_arn = string
ami_kms_key_arn = string
role_path = string
ssm_token_path = string
ssm_config_path = string
ami_id_ssm_parameter_name = string
ami_id_ssm_parameter_read_policy_arn = string
arn_ssm_parameters_path_config = string
lambda_tags = map(string)
user_agent = string
})
| n/a | yes | +| [config](#input\_config) | Lookup details in parent module. |
object({
lambda = object({
log_level = string
logging_retention_in_days = number
logging_kms_key_id = string
reserved_concurrent_executions = number
s3_bucket = string
s3_key = string
s3_object_version = string
security_group_ids = list(string)
runtime = string
architecture = string
memory_size = number
timeout = number
zip = string
subnet_ids = list(string)
})
tags = map(string)
ghes = object({
url = string
ssl_verify = string
})
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
subnet_ids = list(string)
runner = object({
disable_runner_autoupdate = bool
ephemeral = bool
enable_jit_config = bool
enable_on_demand_failover_for_errors = list(string)
boot_time_in_minutes = number
labels = list(string)
launch_template = object({
name = string
})
group_name = string
name_prefix = string
pool_owners = string
role = object({
arn = string
})
})
instance_types = list(string)
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
prefix = string
pool = list(object({
dynamic_pool_scaling_enabled = bool
schedule_expression = string
schedule_expression_timezone = string
size = number
}))
role_permissions_boundary = string
kms_key_arn = string
ami_kms_key_arn = string
role_path = string
ssm_token_path = string
ssm_config_path = string
ami_id_ssm_parameter_name = string
ami_id_ssm_parameter_read_policy_arn = string
arn_ssm_parameters_path_config = string
lambda_tags = map(string)
user_agent = string
})
| n/a | yes | | [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | ## Outputs From 86344e4dfb1190969baf05d06d54bef332baf89d Mon Sep 17 00:00:00 2001 From: galargh Date: Sun, 23 Feb 2025 22:23:58 +0100 Subject: [PATCH 8/8] chore: reduce cyclomatic complexity of pool implementation --- lambdas/functions/control-plane/src/pool/pool.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lambdas/functions/control-plane/src/pool/pool.ts b/lambdas/functions/control-plane/src/pool/pool.ts index 789d843846..41ddf0c827 100644 --- a/lambdas/functions/control-plane/src/pool/pool.ts +++ b/lambdas/functions/control-plane/src/pool/pool.ts @@ -27,8 +27,7 @@ interface RunnerStatus { function canRunJob(workflowJobLabels: string[], runnerLabels: string[]): boolean { runnerLabels = runnerLabels.map((label) => label.toLowerCase()); const matchLabels = workflowJobLabels.every((wl) => runnerLabels.includes(wl.toLowerCase())); - const match = workflowJobLabels.length === 0 ? !matchLabels : matchLabels; - return match; + return workflowJobLabels.length !== 0 && matchLabels; } export async function adjust(event: PoolEvent): Promise { @@ -52,9 +51,7 @@ export async function adjust(event: PoolEvent): Promise { const runnerOwners = process.env.RUNNER_OWNERS.split(','); const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME; const tracingEnabled = yn(process.env.POWERTOOLS_TRACE_ENABLED, { default: false }); - const onDemandFailoverOnError = process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS - ? (JSON.parse(process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS) as [string]) - : []; + const onDemandFailoverOnError: string[] = JSON.parse(process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS ?? '[]'); const { ghesApiUrl, ghesBaseUrl } = getGitHubEnterpriseApiUrl();