diff --git a/README.md b/README.md index c9c247f5d3..592be80588 100644 --- a/README.md +++ b/README.md @@ -166,11 +166,11 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh) | [matcher\_config\_parameter\_store\_tier](#input\_matcher\_config\_parameter\_store\_tier) | The tier of the parameter store for the matcher configuration. Valid values are `Standard`, and `Advanced`. | `string` | `"Standard"` | no | | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | | [minimum\_running\_time\_in\_minutes](#input\_minimum\_running\_time\_in\_minutes) | The time an ec2 action runner should be running at minimum before terminated, if not busy. | `number` | `null` | no | -| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). |
list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
}))
| `[]` | no | +| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false). |
list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
}))
| `[]` | no | | [pool\_lambda\_memory\_size](#input\_pool\_lambda\_memory\_size) | Memory size limit for scale-up lambda. | `number` | `512` | no | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | -| [pool\_runner\_owner](#input\_pool\_runner\_owner) | The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported. | `string` | `null` | no | +| [pool\_runner\_owners](#input\_pool\_runner\_owners) | The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma. | `string` | `null` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | | [queue\_encryption](#input\_queue\_encryption) | Configure how data on queues managed by the modules in ecrypted at REST. Options are encryped via SSE, non encrypted and via KMSS. By default encryptes via SSE is enabled. See for more details the Terraform `aws_sqs_queue` resource https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue. |
object({
kms_data_key_reuse_period_seconds = number
kms_master_key_id = string
sqs_managed_sse_enabled = bool
})
|
{
"kms_data_key_reuse_period_seconds": null,
"kms_master_key_id": null,
"sqs_managed_sse_enabled": true
}
| no | | [redrive\_build\_queue](#input\_redrive\_build\_queue) | Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries. |
object({
enabled = bool
maxReceiveCount = number
})
|
{
"enabled": false,
"maxReceiveCount": null
}
| no | diff --git a/docs/configuration.md b/docs/configuration.md index c7f53121ed..236cba048e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -62,16 +62,17 @@ module "runners" { ## Pool -The module supports two options for keeping a pool of runners. One is via a pool which only supports org-level runners, the second option is [keeping runners idle](#idle-runners). +The module supports two options for keeping a pool of runners. One is via a pool, the second option is [keeping runners idle](#idle-runners). The pool is introduced in combination with the ephemeral runners and is primarily meant to ensure if any event is unexpectedly dropped and no runner was created, the pool can pick up the job. The pool is maintained by a lambda. Each time the lambda is triggered a check is performed to ensure the number of idle runners managed by the module matches the expected pool size. If not, the pool will be adjusted. Keep in mind that the scale down function is still active and will terminate instances that are detected as idle. ```hcl -pool_runner_owner = "my-org" # Org to which the runners are added +pool_runner_owners = "my-org" # Org to which the runners are added pool_config = [{ size = 20 # size of the pool schedule_expression = "cron(* * * * ? *)" # cron expression to trigger the adjustment of the pool schedule_expression_timezone = "Australia/Sydney" # optional time zone (defaults to UTC) + dynamic_pool_scaling_enabled = false # EXPERIMENTAL: if optionaly enabled, the pool will be scaled dynamically, up to the pool size, based on the number of queued jobs (defaults to false) }] ``` @@ -334,3 +335,11 @@ resource "aws_iam_role_policy" "event_rule_firehose_role" { NOTE: By default, a runner AMI update requires a re-apply of this terraform config (the runner AMI ID is looked up by a terraform data source). To avoid this, you can use `ami_id_ssm_parameter_name` to have the scale-up lambda dynamically lookup the runner AMI ID from an SSM parameter at instance launch time. Said SSM parameter is managed outside of this module (e.g. by a runner AMI build workflow). + +### Dynamic Pool Scaling + +This feature allows the pool to grow dynamically based on the number of queued jobs. It can be enabled by setting the `pool_config.dynamic_pool_scaling_enabled` to `true`. + +If the feature is enabled, the expected pool size will be calculated based on the number of queued jobs. The effective size of the pool will be set to the minimum of the number of queued jobs and the configured pool size. + +This feature is disabled by default because the retrieval of queued jobs may exhause the GitHub API for larger deployments and cause rate limits. For larger deployments with a lot of frequent jobs having a permanent small pool available could be a better choice. diff --git a/examples/ephemeral/main.tf b/examples/ephemeral/main.tf index 25a79fb718..6bb8e71bce 100644 --- a/examples/ephemeral/main.tf +++ b/examples/ephemeral/main.tf @@ -63,7 +63,7 @@ module "runners" { enable_ephemeral_runners = true # # Example of simple pool usages - # pool_runner_owner = "YOUR_ORG" + # pool_runner_owners = "YOUR_ORG" # pool_config = [{ # size = 3 # schedule_expression = "cron(0/3 14 * * ? *)" # every 3 minutes between 14:00 and 15:00 diff --git a/lambdas/functions/control-plane/src/lambda.test.ts b/lambdas/functions/control-plane/src/lambda.test.ts index 69ccad17a0..6551bfd38e 100644 --- a/lambdas/functions/control-plane/src/lambda.test.ts +++ b/lambdas/functions/control-plane/src/lambda.test.ts @@ -152,7 +152,7 @@ describe('Adjust pool.', () => { resolve(); }); }); - await expect(adjustPool({ poolSize: 2 }, context)).resolves.not.toThrow(); + await expect(adjustPool({ poolSize: 2, dynamicPoolScalingEnabled: false }, context)).resolves.not.toThrow(); }); it('Handle error for adjusting pool.', async () => { @@ -160,7 +160,7 @@ describe('Adjust pool.', () => { const error = new Error('Handle error for adjusting pool.'); mock.mockRejectedValue(error); const logSpy = jest.spyOn(logger, 'error'); - await adjustPool({ poolSize: 0 }, context); + await adjustPool({ poolSize: 0, dynamicPoolScalingEnabled: false }, context); expect(logSpy).lastCalledWith(expect.stringContaining(error.message), expect.anything()); }); }); diff --git a/lambdas/functions/control-plane/src/local-pool.ts b/lambdas/functions/control-plane/src/local-pool.ts index ab8c74a1a0..f53ca23a05 100644 --- a/lambdas/functions/control-plane/src/local-pool.ts +++ b/lambdas/functions/control-plane/src/local-pool.ts @@ -1,7 +1,7 @@ import { adjust } from './pool/pool'; export function run(): void { - adjust({ poolSize: 1 }) + adjust({ poolSize: 1, dynamicPoolScalingEnabled: false }) .then() .catch((e) => { console.log(e); diff --git a/lambdas/functions/control-plane/src/modules.d.ts b/lambdas/functions/control-plane/src/modules.d.ts index 7570f29035..1667eacf01 100644 --- a/lambdas/functions/control-plane/src/modules.d.ts +++ b/lambdas/functions/control-plane/src/modules.d.ts @@ -14,7 +14,7 @@ declare namespace NodeJS { PARAMETER_GITHUB_APP_CLIENT_SECRET_NAME: string; PARAMETER_GITHUB_APP_ID_NAME: string; PARAMETER_GITHUB_APP_KEY_BASE64_NAME: string; - RUNNER_OWNER: string; + RUNNER_OWNERS: string; SCALE_DOWN_CONFIG: string; SSM_TOKEN_PATH: string; SSM_CLEANUP_CONFIG: string; diff --git a/lambdas/functions/control-plane/src/pool/pool.test.ts b/lambdas/functions/control-plane/src/pool/pool.test.ts index 253d63300b..c2ca9e566a 100644 --- a/lambdas/functions/control-plane/src/pool/pool.test.ts +++ b/lambdas/functions/control-plane/src/pool/pool.test.ts @@ -6,16 +6,21 @@ import nock from 'nock'; import { listEC2Runners } from '../aws/runners'; import * as ghAuth from '../github/auth'; import { createRunners, getGitHubEnterpriseApiUrl } from '../scale-runners/scale-up'; -import { adjust } from './pool'; +import * as pool from './pool'; const mockOctokit = { - paginate: jest.fn(), + paginate: (f: (arg0: unknown) => unknown[], o: unknown) => f(o), checks: { get: jest.fn() }, actions: { createRegistrationTokenForOrg: jest.fn(), + listJobsForWorkflowRunAttempt: jest.fn(), + listSelfHostedRunnersForOrg: jest.fn(), + listSelfHostedRunnersForRepo: jest.fn(), + listWorkflowRunsForRepo: jest.fn(), }, apps: { getOrgInstallation: jest.fn(), + listReposAccessibleToInstallation: jest.fn(), }, }; @@ -30,6 +35,8 @@ jest.mock('./../aws/runners', () => ({ jest.mock('./../github/auth'); jest.mock('../scale-runners/scale-up'); +const { adjust, getNumberOfQueuedJobs } = pool; + const mocktokit = Octokit as jest.MockedClass; const mockedAppAuth = mocked(ghAuth.createGithubAppAuth, { shallow: false, @@ -37,11 +44,13 @@ const mockedAppAuth = mocked(ghAuth.createGithubAppAuth, { const mockedInstallationAuth = mocked(ghAuth.createGithubInstallationAuth, { shallow: false }); const mockCreateClient = mocked(ghAuth.createOctokitClient, { shallow: false }); const mockListRunners = mocked(listEC2Runners); +const mockGetNumberOfQueuedJobs = jest.spyOn(pool, 'getNumberOfQueuedJobs'); const cleanEnv = process.env; const ORG = 'my-org'; const MINIMUM_TIME_RUNNING = 15; +const LABELS = ['label1', 'label2']; const ec2InstancesRegistered = [ { @@ -79,7 +88,7 @@ const githubRunnersRegistered = [ os: 'linux', status: 'online', busy: false, - labels: [], + labels: LABELS, }, { id: 2, @@ -87,7 +96,7 @@ const githubRunnersRegistered = [ os: 'linux', status: 'online', busy: true, - labels: [], + labels: LABELS, }, { id: 3, @@ -95,7 +104,7 @@ const githubRunnersRegistered = [ os: 'linux', status: 'offline', busy: false, - labels: [], + labels: LABELS, }, { id: 3, @@ -103,7 +112,22 @@ const githubRunnersRegistered = [ os: 'linux', status: 'online', busy: false, - labels: [], + labels: LABELS, + }, +]; + +const githubReposAccessibleToInstallation = [ + { + owner: { + login: ORG, + }, + name: 'my-repo-1', + }, + { + owner: { + login: ORG, + }, + name: 'my-repo-2', }, ]; @@ -124,8 +148,9 @@ beforeEach(() => { process.env.SSM_TOKEN_PATH = '/github-action-runners/default/runners/tokens'; process.env.INSTANCE_TYPES = 'm5.large'; process.env.INSTANCE_TARGET_CAPACITY_TYPE = 'spot'; - process.env.RUNNER_OWNER = ORG; + process.env.RUNNER_OWNERS = ORG; process.env.RUNNER_BOOT_TIME_IN_MINUTES = MINIMUM_TIME_RUNNING.toString(); + process.env.RUNNER_LABELS = LABELS.join(','); const mockTokenReturnValue = { data: { @@ -134,7 +159,15 @@ beforeEach(() => { }; mockOctokit.actions.createRegistrationTokenForOrg.mockImplementation(() => mockTokenReturnValue); - mockOctokit.paginate.mockImplementation(() => githubRunnersRegistered); + mockOctokit.actions.listSelfHostedRunnersForOrg.mockImplementation(() => githubRunnersRegistered); + + mockOctokit.actions.listSelfHostedRunnersForRepo.mockImplementation(() => githubRunnersRegistered); + + mockOctokit.apps.listReposAccessibleToInstallation.mockImplementation(() => githubReposAccessibleToInstallation); + + mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async () => []); + + mockOctokit.actions.listJobsForWorkflowRunAttempt.mockImplementation(async () => []); mockListRunners.mockImplementation(async () => ec2InstancesRegistered); @@ -174,17 +207,17 @@ describe('Test simple pool.', () => { }); }); it('Top up pool with pool size 2 registered.', async () => { - await expect(await adjust({ poolSize: 3 })).resolves; + await expect(await adjust({ poolSize: 3, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).toHaveBeenCalledTimes(1); expect(createRunners).toHaveBeenCalledWith( - expect.anything(), + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), expect.objectContaining({ numberOfRunners: 1 }), expect.anything(), ); }); it('Should not top up if pool size is reached.', async () => { - await expect(await adjust({ poolSize: 1 })).resolves; + await expect(await adjust({ poolSize: 1, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).not.toHaveBeenCalled(); }); @@ -210,9 +243,9 @@ describe('Test simple pool.', () => { ]); // 2 idle + 1 booting = 3, top up with 2 to match a pool of 5 - await expect(await adjust({ poolSize: 5 })).resolves; + await expect(await adjust({ poolSize: 5, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).toHaveBeenCalledWith( - expect.anything(), + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), expect.objectContaining({ numberOfRunners: 2 }), expect.anything(), ); @@ -239,7 +272,13 @@ describe('Test simple pool.', () => { }, ]); - await expect(await adjust({ poolSize: 2 })).resolves; + await expect(await adjust({ poolSize: 2, dynamicPoolScalingEnabled: false })).resolves; + expect(createRunners).not.toHaveBeenCalled(); + }); + + it('Should not top up if pool size is invalid.', async () => { + process.env.RUNNER_LABELS = undefined; + await expect(await adjust({ poolSize: -2, dynamicPoolScalingEnabled: false })).resolves; expect(createRunners).not.toHaveBeenCalled(); }); }); @@ -253,7 +292,7 @@ describe('Test simple pool.', () => { }); it('Top up if the pool size is set to 5', async () => { - await expect(await adjust({ poolSize: 5 })).resolves; + await expect(await adjust({ poolSize: 5, dynamicPoolScalingEnabled: false })).resolves; // 2 idle, top up with 3 to match a pool of 5 expect(createRunners).toHaveBeenCalledWith( expect.anything(), @@ -272,10 +311,10 @@ describe('Test simple pool.', () => { }); it('Top up if the pool size is set to 5', async () => { - await expect(await adjust({ poolSize: 5 })).resolves; + await expect(await adjust({ poolSize: 5, dynamicPoolScalingEnabled: false })).resolves; // 2 idle, top up with 3 to match a pool of 5 expect(createRunners).toHaveBeenCalledWith( - expect.anything(), + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), expect.objectContaining({ numberOfRunners: 3 }), expect.anything(), ); @@ -289,7 +328,7 @@ describe('Test simple pool.', () => { it('Should top up with fewer runners when there are idle prefixed runners', async () => { // Add prefixed runners to github - mockOctokit.paginate.mockImplementation(async () => [ + mockOctokit.actions.listSelfHostedRunnersForOrg.mockImplementation(async () => [ ...githubRunnersRegistered, { id: 5, @@ -326,13 +365,158 @@ describe('Test simple pool.', () => { }, ]); - await expect(await adjust({ poolSize: 5 })).resolves; + await expect(await adjust({ poolSize: 5, dynamicPoolScalingEnabled: false })).resolves; // 2 idle, 2 prefixed idle top up with 1 to match a pool of 5 expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), + expect.objectContaining({ numberOfRunners: 1 }), expect.anything(), + ); + }); + }); + + describe('With Dynamic Pool Scaling Enabled', () => { + const testCases = [ + { poolSize: 1, numberOfRunners: 0 }, + { poolSize: 2, numberOfRunners: 0 }, + { poolSize: 4, numberOfRunners: 2, numberOfQueuedJobs: 6 }, + { poolSize: 4, numberOfRunners: 2, numberOfQueuedJobs: 4 }, + { poolSize: 4, numberOfRunners: 1, numberOfQueuedJobs: 3 }, + { poolSize: 4, numberOfRunners: 0, numberOfQueuedJobs: 2 }, + { poolSize: 4, numberOfRunners: 0, numberOfQueuedJobs: 0 }, + ]; + + for (const { poolSize, numberOfRunners, numberOfQueuedJobs } of testCases) { + let message = numberOfRunners === 0 ? 'Should not top up' : `Should top up with ${numberOfRunners} runners`; + message += ` when the maximum pool size is ${poolSize}, and there are 2 idle runners`; + if (numberOfQueuedJobs !== undefined) { + message += ` and ${numberOfQueuedJobs} queued jobs`; + } + + it(message, async () => { + if (numberOfQueuedJobs !== undefined) { + mockGetNumberOfQueuedJobs.mockReturnValueOnce(Promise.resolve(numberOfQueuedJobs)); + } + await expect(await adjust({ poolSize, dynamicPoolScalingEnabled: true })).resolves; + if (numberOfQueuedJobs === undefined) { + expect(mockGetNumberOfQueuedJobs).not.toHaveBeenCalled(); + } else { + expect(mockGetNumberOfQueuedJobs).toHaveBeenCalledTimes(1); + } + if (numberOfRunners === 0) { + expect(createRunners).not.toHaveBeenCalled(); + } else { + expect(createRunners).toHaveBeenCalledTimes(1); + expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner: ORG, runnerType: 'Org' }), + expect.objectContaining({ numberOfRunners }), + expect.anything(), + ); + } + }); + } + }); + + describe('With Runner Type Repo', () => { + it('Should top up the repository runners pool', async () => { + const runnerOwner = `${ORG}/my-repo-1`; + process.env.RUNNER_OWNERS = runnerOwner; + await expect(await adjust({ poolSize: 3, dynamicPoolScalingEnabled: false })).resolves; + expect(createRunners).toHaveBeenCalledTimes(1); + expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner, runnerType: 'Repo' }), expect.objectContaining({ numberOfRunners: 1 }), expect.anything(), ); }); + + it('Should top up the repository runners pool dynamically', async () => { + const runnerOwner = `${ORG}/my-repo-1`; + process.env.RUNNER_OWNERS = runnerOwner; + mockGetNumberOfQueuedJobs.mockReturnValueOnce(Promise.resolve(3)); + await expect(await adjust({ poolSize: 3, dynamicPoolScalingEnabled: true })).resolves; + expect(createRunners).toHaveBeenCalledTimes(1); + expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner, runnerType: 'Repo' }), + expect.objectContaining({ numberOfRunners: 1 }), + expect.anything(), + ); + }); + }); + + describe('With Multiple Runner Owners', () => { + it('Should top up pools for all runner owners', async () => { + const runnerOwners = [`${ORG}/my-repo-1`, `${ORG}/my-repo-2`]; + process.env.RUNNER_OWNERS = runnerOwners.join(','); + await expect(await adjust({ poolSize: 3, dynamicPoolScalingEnabled: false })).resolves; + expect(createRunners).toHaveBeenCalledTimes(2); + for (const runnerOwner of runnerOwners) { + expect(createRunners).toHaveBeenCalledWith( + expect.objectContaining({ runnerOwner, runnerType: 'Repo' }), + expect.objectContaining({ numberOfRunners: 1 }), + expect.anything(), + ); + } + }); }); }); + +describe('Test number of queued jobs retrieval.', () => { + let ghClient: Octokit; + + beforeEach(() => { + ghClient = new mocktokit(); + + mockOctokit.actions.listWorkflowRunsForRepo.mockImplementation(async ({ owner, repo }) => [ + { + repository: { + owner: { login: owner }, + name: repo, + }, + id: 1, + attempt_number: 1, + }, + { + repository: { + owner: { login: owner }, + name: repo, + }, + id: 2, + attempt_number: 1, + }, + ]); + + mockOctokit.actions.listJobsForWorkflowRunAttempt.mockImplementation(async () => [ + { + status: 'queued', + labels: LABELS, + }, + { + status: 'queued', + labels: LABELS, + }, + { + status: 'queued', + labels: [...LABELS, 'label3'], + }, + { + status: 'in_progress', + labels: LABELS, + }, + ]); + }); + + it('Should retrieve the number of queued jobs for the org', async () => { + // 2 repos x 2 workflow runs x 2 queued jobs with matching labels + await expect(getNumberOfQueuedJobs(ghClient, ORG, 'Org', LABELS.join(','))).resolves.toBe(8); + }); + + for (const githubRepo of githubReposAccessibleToInstallation) { + it(`Should retrieve the number of queued jobs for the repo ${githubRepo.name}`, async () => { + // 1 repo x 2 workflow runs x 2 queued jobs with matching labels + await expect( + getNumberOfQueuedJobs(ghClient, `${githubRepo.owner.login}/${githubRepo.name}`, 'Repo', LABELS.join(',')), + ).resolves.toBe(4); + }); + } +}); diff --git a/lambdas/functions/control-plane/src/pool/pool.ts b/lambdas/functions/control-plane/src/pool/pool.ts index 162a7d0f6d..41ddf0c827 100644 --- a/lambdas/functions/control-plane/src/pool/pool.ts +++ b/lambdas/functions/control-plane/src/pool/pool.ts @@ -1,16 +1,20 @@ import { Octokit } from '@octokit/rest'; +import { GetResponseDataTypeFromEndpointMethod } from '@octokit/types'; import { createChildLogger } from '@aws-github-runner/aws-powertools-util'; import yn from 'yn'; import { bootTimeExceeded, listEC2Runners } from '../aws/runners'; -import { RunnerList } from '../aws/runners.d'; +import { RunnerList, RunnerType } from '../aws/runners.d'; import { createGithubAppAuth, createGithubInstallationAuth, createOctokitClient } from '../github/auth'; import { createRunners, getGitHubEnterpriseApiUrl } from '../scale-runners/scale-up'; const logger = createChildLogger('pool'); +type Repository = GetResponseDataTypeFromEndpointMethod; + export interface PoolEvent { poolSize: number; + dynamicPoolScalingEnabled: boolean; } interface RunnerStatus { @@ -18,6 +22,14 @@ interface RunnerStatus { status: string; } +// TODO: Move this function to a common module - a very similar function is +// defined in ../../webhook/src/runners/dispatch.ts +function canRunJob(workflowJobLabels: string[], runnerLabels: string[]): boolean { + runnerLabels = runnerLabels.map((label) => label.toLowerCase()); + const matchLabels = workflowJobLabels.every((wl) => runnerLabels.includes(wl.toLowerCase())); + return workflowJobLabels.length !== 0 && matchLabels; +} + export async function adjust(event: PoolEvent): Promise { logger.info(`Checking current pool size against pool of size: ${event.poolSize}`); const runnerLabels = process.env.RUNNER_LABELS || ''; @@ -35,38 +47,68 @@ export async function adjust(event: PoolEvent): Promise { const launchTemplateName = process.env.LAUNCH_TEMPLATE_NAME; const instanceMaxSpotPrice = process.env.INSTANCE_MAX_SPOT_PRICE; const instanceAllocationStrategy = process.env.INSTANCE_ALLOCATION_STRATEGY || 'lowest-price'; // same as AWS default - const runnerOwner = process.env.RUNNER_OWNER; + // RUNNER_OWNERS is a comma-split list of owners, which might be either org or repo owners + const runnerOwners = process.env.RUNNER_OWNERS.split(','); const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME; const tracingEnabled = yn(process.env.POWERTOOLS_TRACE_ENABLED, { default: false }); - const onDemandFailoverOnError = process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS - ? (JSON.parse(process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS) as [string]) - : []; + const onDemandFailoverOnError: string[] = JSON.parse(process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS ?? '[]'); const { ghesApiUrl, ghesBaseUrl } = getGitHubEnterpriseApiUrl(); - const installationId = await getInstallationId(ghesApiUrl, runnerOwner); - const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); - const githubInstallationClient = await createOctokitClient(ghAuth.token, ghesApiUrl); - - // Get statusses of runners registed in GitHub - const runnerStatusses = await getGitHubRegisteredRunnnerStatusses( - githubInstallationClient, - runnerOwner, - runnerNamePrefix, - ); - - // Look up the managed ec2 runners in AWS, but running does not mean idle - const ec2runners = await listEC2Runners({ - environment, - runnerOwner, - runnerType: 'Org', - statuses: ['running'], - }); - - const numberOfRunnersInPool = calculatePooSize(ec2runners, runnerStatusses); - const topUp = event.poolSize - numberOfRunnersInPool; - - if (topUp > 0) { + for (const runnerOwner of runnerOwners) { + logger.info(`Checking ${runnerOwner}`); + + const [owner, repo] = runnerOwner.split('/'); + const runnerType = repo === undefined ? 'Org' : 'Repo'; + + const installationId = await getInstallationId(ghesApiUrl, owner); + const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); + const githubInstallationClient = await createOctokitClient(ghAuth.token, ghesApiUrl); + + // Get statusses of runners registed in GitHub + const runnerStatusses = await getGitHubRegisteredRunnnerStatusses( + githubInstallationClient, + runnerOwner, + runnerType, + runnerNamePrefix, + ); + + // Look up the managed ec2 runners in AWS, but running does not mean idle + const ec2runners = await listEC2Runners({ + environment, + runnerOwner, + runnerType, + statuses: ['running'], + }); + + if (event.poolSize <= 0) { + logger.error(`Invalid pool size: ${event.poolSize}`); + return; + } + + const currentPoolSize = calculateCurrentPoolSize(ec2runners, runnerStatusses); + + if (currentPoolSize >= event.poolSize) { + logger.info(`Pool will not be topped up. Found ${currentPoolSize} managed idle runners.`); + return; + } + + const targetPoolSize = await calculateTargetPoolSize( + githubInstallationClient, + runnerOwner, + runnerType, + runnerLabels, + event.poolSize, + event.dynamicPoolScalingEnabled, + ); + + if (currentPoolSize >= targetPoolSize) { + logger.info(`Pool will not be topped up. Found ${currentPoolSize} managed idle runners.`); + return; + } + + const topUp = targetPoolSize - currentPoolSize; + logger.info(`The pool will be topped up with ${topUp} runners.`); await createRunners( { @@ -77,7 +119,7 @@ export async function adjust(event: PoolEvent): Promise { runnerGroup, runnerOwner, runnerNamePrefix, - runnerType: 'Org', + runnerType, disableAutoUpdate: disableAutoUpdate, ssmTokenPath, ssmConfigPath, @@ -99,8 +141,6 @@ export async function adjust(event: PoolEvent): Promise { }, githubInstallationClient, ); - } else { - logger.info(`Pool will not be topped up. Found ${numberOfRunnersInPool} managed idle runners.`); } } @@ -115,7 +155,7 @@ async function getInstallationId(ghesApiUrl: string, org: string): Promise): number { +function calculateCurrentPoolSize(ec2runners: RunnerList[], runnerStatus: Map): number { // Runner should be considered idle if it is still booting, or is idle in GitHub let numberOfRunnersInPool = 0; for (const ec2Instance of ec2runners) { @@ -139,15 +179,91 @@ function calculatePooSize(ec2runners: RunnerList[], runnerStatus: Map { + if (!dynamicPoolScalingEnabled) { + return poolSize; + } + + // This call is made on the exports object to enable mocking it in tests + const numberOfQueuedJobs = await exports.getNumberOfQueuedJobs(ghClient, runnerOwner, runnerType, runnerLabels); + + return Math.min(poolSize, numberOfQueuedJobs); +} + +// This function is exported for testing purposes only +export async function getNumberOfQueuedJobs( + ghClient: Octokit, + runnerOwner: string, + runnerType: RunnerType, + runnerLabels: string, +): Promise { + logger.info('Checking for queued jobs to determine pool size'); + const [owner, repo] = runnerOwner.split('/'); + let repos; + if (runnerType === 'Repo') { + repos = [repo]; + } else { + // @ts-expect-error The types normalized by paginate are not correct, + // because they only flatten .data, while in case of listReposAccessibleToInstallation, + // they should flatten .repositories. + const reposAccessibleToInstallation = (await ghClient.paginate(ghClient.apps.listReposAccessibleToInstallation, { + per_page: 100, + })) as Repository[]; + repos = reposAccessibleToInstallation.filter((repo) => repo.owner.login === owner).map((repo) => repo.name); + } + const queuedWorkflowRuns = []; + for (const repo of repos) { + const workflowRuns = await ghClient.paginate(ghClient.actions.listWorkflowRunsForRepo, { + owner, + repo, + status: 'queued', + per_page: 100, + }); + queuedWorkflowRuns.push(...workflowRuns); + } + const queuedJobs = []; + for (const workflowRun of queuedWorkflowRuns) { + const jobs = await ghClient.paginate(ghClient.actions.listJobsForWorkflowRunAttempt, { + owner: workflowRun.repository.owner.login, + repo: workflowRun.repository.name, + run_id: workflowRun.id, + attempt_number: workflowRun.run_attempt || 1, + per_page: 100, + }); + queuedJobs.push(...jobs.filter((job) => job.status === 'queued')); + } + const numberOfQueuedJobs = queuedJobs.filter((job) => canRunJob(job.labels, runnerLabels.split(','))).length; + logger.info(`Found ${numberOfQueuedJobs} queued jobs`); + return numberOfQueuedJobs; +} + async function getGitHubRegisteredRunnnerStatusses( ghClient: Octokit, runnerOwner: string, + runnerType: RunnerType, runnerNamePrefix: string, ): Promise> { - const runners = await ghClient.paginate(ghClient.actions.listSelfHostedRunnersForOrg, { - org: runnerOwner, - per_page: 100, - }); + let runners; + if (runnerType === 'Repo') { + const [owner, repo] = runnerOwner.split('/'); + runners = await ghClient.paginate(ghClient.actions.listSelfHostedRunnersForRepo, { + owner, + repo, + per_page: 100, + }); + } else { + runners = await ghClient.paginate(ghClient.actions.listSelfHostedRunnersForOrg, { + org: runnerOwner, + per_page: 100, + }); + } const runnerStatus = new Map(); for (const runner of runners) { runner.name = runnerNamePrefix ? runner.name.replace(runnerNamePrefix, '') : runner.name; diff --git a/main.tf b/main.tf index e4e18000fd..1ebbb2262b 100644 --- a/main.tf +++ b/main.tf @@ -265,7 +265,7 @@ module "runners" { pool_config = var.pool_config pool_lambda_memory_size = var.pool_lambda_memory_size pool_lambda_timeout = var.pool_lambda_timeout - pool_runner_owner = var.pool_runner_owner + pool_runner_owners = var.pool_runner_owners pool_lambda_reserved_concurrent_executions = var.pool_lambda_reserved_concurrent_executions ssm_housekeeper = var.runners_ssm_housekeeper diff --git a/modules/multi-runner/README.md b/modules/multi-runner/README.md index 771f7e1f98..8e4cb714eb 100644 --- a/modules/multi-runner/README.md +++ b/modules/multi-runner/README.md @@ -148,7 +148,7 @@ module "multi-runner" { | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | | [matcher\_config\_parameter\_store\_tier](#input\_matcher\_config\_parameter\_store\_tier) | The tier of the parameter store for the matcher configuration. Valid values are `Standard`, and `Advanced`. | `string` | `"Standard"` | no | | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default metrics are disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | -| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_hook\_job\_started: "Script to be ran in the runner environment at the beginning of every job"
runner\_hook\_job\_completed: "Script to be ran in the runner environment at the end of every job"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owner = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_hook_job_started = optional(string, "")
runner_hook_job_completed = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | +| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owners: "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_hook\_job\_started: "Script to be ran in the runner environment at the beginning of every job"
runner\_hook\_job\_completed: "Script to be ran in the runner environment at the end of every job"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owners = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_hook_job_started = optional(string, "")
runner_hook_job_completed = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf index 8fe23d506d..96ed48fb0a 100644 --- a/modules/multi-runner/runners.tf +++ b/modules/multi-runner/runners.tf @@ -110,7 +110,7 @@ module "runners" { pool_config = each.value.runner_config.pool_config pool_lambda_timeout = var.pool_lambda_timeout - pool_runner_owner = each.value.runner_config.pool_runner_owner + pool_runner_owners = each.value.runner_config.pool_runner_owners pool_lambda_reserved_concurrent_executions = var.pool_lambda_reserved_concurrent_executions associate_public_ipv4_address = var.associate_public_ipv4_address diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index bc45ee4057..64b1594576 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -58,7 +58,7 @@ variable "multi_runner_config" { instance_types = list(string) job_queue_retention_in_seconds = optional(number, 86400) minimum_running_time_in_minutes = optional(number, null) - pool_runner_owner = optional(string, null) + pool_runner_owners = optional(string, null) runner_as_root = optional(bool, false) runner_boot_time_in_minutes = optional(number, 5) runner_disable_default_labels = optional(bool, false) @@ -110,6 +110,7 @@ variable "multi_runner_config" { volume_size = 30 }]) pool_config = optional(list(object({ + dynamic_pool_scaling_enabled = optional(bool, false) schedule_expression = string schedule_expression_timezone = optional(string) size = number @@ -162,7 +163,7 @@ variable "multi_runner_config" { instance_types: "List of instance types for the action runner. Defaults are based on runner_os (al2023 for linux and Windows Server Core for win)." job_queue_retention_in_seconds: "The number of seconds the job is held in the queue before it is purged" minimum_running_time_in_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy." - pool_runner_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported." + pool_runner_owners: "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma." runner_additional_security_group_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi_runner_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi_runner_config, the additional security group(s) will be applied to the individual runner." runner_as_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored." runner_boot_time_in_minutes: "The minimum time for an EC2 runner to boot and register as a runner." @@ -191,7 +192,7 @@ variable "multi_runner_config" { runner_log_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details." block_device_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`." job_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app." - pool_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)." + pool_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false)." } matcherConfig: { labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`" diff --git a/modules/runners/README.md b/modules/runners/README.md index 3127c68e29..c7802633e6 100644 --- a/modules/runners/README.md +++ b/modules/runners/README.md @@ -188,11 +188,11 @@ yarn run dist | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default metrics are disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | | [minimum\_running\_time\_in\_minutes](#input\_minimum\_running\_time\_in\_minutes) | The time an ec2 action runner should be running at minimum before terminated if non busy. If not set the default is calculated based on the OS. | `number` | `null` | no | | [overrides](#input\_overrides) | This map provides the possibility to override some defaults. The following attributes are supported: `name_sg` overrides the `Name` tag for all security groups created by this module. `name_runner_agent_instance` overrides the `Name` tag for the ec2 instance defined in the auto launch configuration. `name_docker_machine_runners` overrides the `Name` tag spot instances created by the runner agent. | `map(string)` |
{
"name_runner": "",
"name_sg": ""
}
| no | -| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone ` to override the schedule time zone (defaults to UTC). |
list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
}))
| `[]` | no | +| [pool\_config](#input\_pool\_config) | The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone ` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false). |
list(object({
dynamic_pool_scaling_enabled = optional(bool, false)
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
}))
| `[]` | no | | [pool\_lambda\_memory\_size](#input\_pool\_lambda\_memory\_size) | Lambda Memory size limit in MB for pool lambda | `number` | `512` | no | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | -| [pool\_runner\_owner](#input\_pool\_runner\_owner) | The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported. | `string` | `null` | no | +| [pool\_runner\_owners](#input\_pool\_runner\_owners) | The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma. | `string` | `null` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | | [role\_path](#input\_role\_path) | The path that will be added to the role; if not set, the prefix will be used. | `string` | `null` | no | | [role\_permissions\_boundary](#input\_role\_permissions\_boundary) | Permissions boundary that will be added to the created role for the lambda. | `string` | `null` | no | diff --git a/modules/runners/pool.tf b/modules/runners/pool.tf index 16a13aeccd..0a17d5a0ff 100644 --- a/modules/runners/pool.tf +++ b/modules/runners/pool.tf @@ -46,7 +46,7 @@ module "pool" { launch_template = aws_launch_template.runner group_name = var.runner_group_name name_prefix = var.runner_name_prefix - pool_owner = var.pool_runner_owner + pool_owners = var.pool_runner_owners role = aws_iam_role.runner } subnet_ids = var.subnet_ids diff --git a/modules/runners/pool/README.md b/modules/runners/pool/README.md index cffad1213a..221760f568 100644 --- a/modules/runners/pool/README.md +++ b/modules/runners/pool/README.md @@ -48,7 +48,7 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [aws\_partition](#input\_aws\_partition) | (optional) partition for the arn if not 'aws' | `string` | `"aws"` | no | -| [config](#input\_config) | Lookup details in parent module. |
object({
lambda = object({
log_level = string
logging_retention_in_days = number
logging_kms_key_id = string
reserved_concurrent_executions = number
s3_bucket = string
s3_key = string
s3_object_version = string
security_group_ids = list(string)
runtime = string
architecture = string
memory_size = number
timeout = number
zip = string
subnet_ids = list(string)
})
tags = map(string)
ghes = object({
url = string
ssl_verify = string
})
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
subnet_ids = list(string)
runner = object({
disable_runner_autoupdate = bool
ephemeral = bool
enable_jit_config = bool
enable_on_demand_failover_for_errors = list(string)
boot_time_in_minutes = number
labels = list(string)
launch_template = object({
name = string
})
group_name = string
name_prefix = string
pool_owner = string
role = object({
arn = string
})
})
instance_types = list(string)
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
prefix = string
pool = list(object({
schedule_expression = string
schedule_expression_timezone = string
size = number
}))
role_permissions_boundary = string
kms_key_arn = string
ami_kms_key_arn = string
role_path = string
ssm_token_path = string
ssm_config_path = string
ami_id_ssm_parameter_name = string
ami_id_ssm_parameter_read_policy_arn = string
arn_ssm_parameters_path_config = string
lambda_tags = map(string)
user_agent = string
})
| n/a | yes | +| [config](#input\_config) | Lookup details in parent module. |
object({
lambda = object({
log_level = string
logging_retention_in_days = number
logging_kms_key_id = string
reserved_concurrent_executions = number
s3_bucket = string
s3_key = string
s3_object_version = string
security_group_ids = list(string)
runtime = string
architecture = string
memory_size = number
timeout = number
zip = string
subnet_ids = list(string)
})
tags = map(string)
ghes = object({
url = string
ssl_verify = string
})
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
subnet_ids = list(string)
runner = object({
disable_runner_autoupdate = bool
ephemeral = bool
enable_jit_config = bool
enable_on_demand_failover_for_errors = list(string)
boot_time_in_minutes = number
labels = list(string)
launch_template = object({
name = string
})
group_name = string
name_prefix = string
pool_owners = string
role = object({
arn = string
})
})
instance_types = list(string)
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
prefix = string
pool = list(object({
dynamic_pool_scaling_enabled = bool
schedule_expression = string
schedule_expression_timezone = string
size = number
}))
role_permissions_boundary = string
kms_key_arn = string
ami_kms_key_arn = string
role_path = string
ssm_token_path = string
ssm_config_path = string
ami_id_ssm_parameter_name = string
ami_id_ssm_parameter_read_policy_arn = string
arn_ssm_parameters_path_config = string
lambda_tags = map(string)
user_agent = string
})
| n/a | yes | | [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | ## Outputs diff --git a/modules/runners/pool/main.tf b/modules/runners/pool/main.tf index 364d315439..e8c658863f 100644 --- a/modules/runners/pool/main.tf +++ b/modules/runners/pool/main.tf @@ -38,7 +38,7 @@ resource "aws_lambda_function" "pool" { RUNNER_LABELS = lower(join(",", var.config.runner.labels)) RUNNER_GROUP_NAME = var.config.runner.group_name RUNNER_NAME_PREFIX = var.config.runner.name_prefix - RUNNER_OWNER = var.config.runner.pool_owner + RUNNER_OWNERS = var.config.runner.pool_owners SSM_TOKEN_PATH = var.config.ssm_token_path SSM_CONFIG_PATH = var.config.ssm_config_path SUBNET_IDS = join(",", var.config.subnet_ids) @@ -214,7 +214,8 @@ resource "aws_scheduler_schedule" "pool" { arn = aws_lambda_function.pool.arn role_arn = aws_iam_role.scheduler.arn input = jsonencode({ - poolSize = each.value.size + poolSize = each.value.size + dynamic_pool_scaling_enabled = each.value.dynamic_pool_scaling_enabled }) } } diff --git a/modules/runners/pool/variables.tf b/modules/runners/pool/variables.tf index baf9746bbb..4f28544533 100644 --- a/modules/runners/pool/variables.tf +++ b/modules/runners/pool/variables.tf @@ -39,7 +39,7 @@ variable "config" { }) group_name = string name_prefix = string - pool_owner = string + pool_owners = string role = object({ arn = string }) @@ -50,6 +50,7 @@ variable "config" { instance_max_spot_price = string prefix = string pool = list(object({ + dynamic_pool_scaling_enabled = bool schedule_expression = string schedule_expression_timezone = string size = number @@ -83,5 +84,3 @@ variable "tracing_config" { }) default = {} } - - diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index 08109008fd..61700b0c47 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -544,8 +544,8 @@ variable "pool_lambda_memory_size" { default = 512 } -variable "pool_runner_owner" { - description = "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported." +variable "pool_runner_owners" { + description = "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma." type = string default = null } @@ -557,8 +557,9 @@ variable "pool_lambda_reserved_concurrent_executions" { } variable "pool_config" { - description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone ` to override the schedule time zone (defaults to UTC)." + description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone ` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false)." type = list(object({ + dynamic_pool_scaling_enabled = optional(bool, false) schedule_expression = string schedule_expression_timezone = optional(string) size = number diff --git a/variables.tf b/variables.tf index eb3fbb7604..936205b6cc 100644 --- a/variables.tf +++ b/variables.tf @@ -690,8 +690,8 @@ variable "pool_lambda_timeout" { default = 60 } -variable "pool_runner_owner" { - description = "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported." +variable "pool_runner_owners" { + description = "The pool will deploy runners to the GitHub org/repo ID(s), set this value to the org/repo(s) to which you want the runners deployed. Separate the entries by a comma." type = string default = null } @@ -703,8 +703,9 @@ variable "pool_lambda_reserved_concurrent_executions" { } variable "pool_config" { - description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)." + description = "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for weekdays to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC). Experimental! Use `dynamic_pool_scaling_enabled` to enable scaling the pool dynamically, up to the `pool_size`, based on the number of queued jobs (defaults to false)." type = list(object({ + dynamic_pool_scaling_enabled = optional(bool, false) schedule_expression = string schedule_expression_timezone = optional(string) size = number