github-aws-runners · npalm · Apr 8, 2025 · Apr 8, 2025
@@ -126,6 +126,7 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh)
 | <a name="input_disable_runner_autoupdate"></a> [disable\_runner\_autoupdate](#input\_disable\_runner\_autoupdate) | Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/) | `bool` | `false` | no |
 | <a name="input_enable_ami_housekeeper"></a> [enable\_ami\_housekeeper](#input\_enable\_ami\_housekeeper) | Option to disable the lambda to clean up old AMIs. | `bool` | `false` | no |
 | <a name="input_enable_cloudwatch_agent"></a> [enable\_cloudwatch\_agent](#input\_enable\_cloudwatch\_agent) | Enables the cloudwatch agent on the ec2 runner instances. The runner uses a default config that can be overridden via `cloudwatch_config`. | `bool` | `true` | no |
+| <a name="input_enable_dynamic_ec2_types"></a> [enable\_dynamic\_ec2\_types](#input\_enable\_dynamic\_ec2\_types) | Enable dynamic EC2 instance types based on workflow job labels. When enabled, jobs can request specific instance types via the 'gh-ec2-instance-type' label (e.g., 'gh-ec2-t3.large'). | `bool` | `false` | no |
 | <a name="input_enable_ephemeral_runners"></a> [enable\_ephemeral\_runners](#input\_enable\_ephemeral\_runners) | Enable ephemeral runners, runners will only be used once. | `bool` | `false` | no |
 | <a name="input_enable_jit_config"></a> [enable\_jit\_config](#input\_enable\_jit\_config) | Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI. | `bool` | `null` | no |
 | <a name="input_enable_job_queued_check"></a> [enable\_job\_queued\_check](#input\_enable\_job\_queued\_check) | Only scale if the job event received by the scale up lambda is in the queued state. By default enabled for non ephemeral runners and disabled for ephemeral. Set this variable to overwrite the default behavior. | `bool` | `null` | no |

@@ -31,6 +31,7 @@ export interface ActionRequestMessage {
   installationId: number;
   repoOwnerType: string;
   retryCounter?: number;
+  labels?: string[];
 }
 
 export interface ActionRequestMessageRetry extends ActionRequestMessage {
@@ -212,25 +213,59 @@ export async function createRunners(
   githubRunnerConfig: CreateGitHubRunnerConfig,
   ec2RunnerConfig: CreateEC2RunnerConfig,
   ghClient: Octokit,
+  requestedInstanceType?: string,
 ): Promise<void> {
   const instances = await createRunner({
+    environment: ec2RunnerConfig.environment,
     runnerType: githubRunnerConfig.runnerType,
     runnerOwner: githubRunnerConfig.runnerOwner,
-    numberOfRunners: 1,
-    ...ec2RunnerConfig,
+    launchTemplateName: ec2RunnerConfig.launchTemplateName,
+    ec2instanceCriteria: requestedInstanceType
+      ? {
+          instanceTypes: [requestedInstanceType],
+          maxSpotPrice: ec2RunnerConfig.ec2instanceCriteria.maxSpotPrice,
+          instanceAllocationStrategy: ec2RunnerConfig.ec2instanceCriteria.instanceAllocationStrategy,
+          targetCapacityType: ec2RunnerConfig.ec2instanceCriteria.targetCapacityType,
+        }
+      : ec2RunnerConfig.ec2instanceCriteria,
+    subnets: ec2RunnerConfig.subnets,
+    numberOfRunners: ec2RunnerConfig.numberOfRunners ?? 1,
+    amiIdSsmParameterName: ec2RunnerConfig.amiIdSsmParameterName,
+    tracingEnabled: ec2RunnerConfig.tracingEnabled,
+    onDemandFailoverOnError: ec2RunnerConfig.onDemandFailoverOnError,
   });
   if (instances.length !== 0) {
     await createStartRunnerConfig(githubRunnerConfig, instances, ghClient);
   }
 }
 
 export async function scaleUp(eventSource: string, payload: ActionRequestMessage): Promise<void> {
-  logger.info(`Received ${payload.eventType} from ${payload.repositoryOwner}/${payload.repositoryName}`);
-
+  logger.debug(`Received event`, { payload });
   if (eventSource !== 'aws:sqs') throw Error('Cannot handle non-SQS events!');
+
+  const dynamicEc2TypesEnabled = yn(process.env.ENABLE_DYNAMIC_EC2_TYPES, { default: false });
+  const requestedInstanceType = payload.labels?.find(label => label.startsWith('ghr-ec2-'))?.replace('ghr-ec2-', '');
+
+  if (dynamicEc2TypesEnabled && requestedInstanceType) {
+    logger.info(`Dynamic EC2 instance type requested: ${requestedInstanceType}`);
+  }
+
+  // Store the requested instance type for use in createRunners
+  const ec2Config = {
+    ...payload,
+    requestedInstanceType: dynamicEc2TypesEnabled ? requestedInstanceType : undefined,
+  };
   const enableOrgLevel = yn(process.env.ENABLE_ORGANIZATION_RUNNERS, { default: true });
   const maximumRunners = parseInt(process.env.RUNNERS_MAXIMUM_COUNT || '3');
-  const runnerLabels = process.env.RUNNER_LABELS || '';
+
+  // Combine configured runner labels with dynamic EC2 instance type label if present
+  let runnerLabels = process.env.RUNNER_LABELS || '';
+  if (dynamicEc2TypesEnabled && requestedInstanceType) {
+    const ec2Label = `ghr-ec2-${requestedInstanceType}`;
+    runnerLabels = runnerLabels ? `${runnerLabels},${ec2Label}` : ec2Label;
+    logger.debug(`Added dynamic EC2 instance type label: ${ec2Label} to runner config.`);
+  }
+
   const runnerGroup = process.env.RUNNER_GROUP_NAME || 'Default';
   const environment = process.env.ENVIRONMENT;
   const ssmTokenPath = process.env.SSM_TOKEN_PATH;
@@ -337,6 +372,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
           onDemandFailoverOnError,
         },
         githubInstallationClient,
+        ec2Config.requestedInstanceType,
       );
 
       await publishRetryMessage(payload);

@@ -45,6 +45,7 @@ async function handleWorkflowJob(
           installationId: body.installation?.id ?? 0,
           queueId: queue.id,
           repoOwnerType: body.repository.owner.type,
+          labels: body.workflow_job.labels,
         });
         logger.info(`Successfully dispatched job for ${body.repository.full_name} to the queue ${queue.id}`);
         return {
@@ -70,13 +71,16 @@ export function canRunJob(
   runnerLabelsMatchers: string[][],
   workflowLabelCheckAll: boolean,
 ): boolean {
+  // Filter out ghr-ec2- labels as they are handled by the dynamic EC2 instance type feature
+  const filteredLabels = workflowJobLabels.filter(label => !label.startsWith('ghr-ec2-'));
+
   runnerLabelsMatchers = runnerLabelsMatchers.map((runnerLabel) => {
     return runnerLabel.map((label) => label.toLowerCase());
   });
   const matchLabels = workflowLabelCheckAll
-    ? runnerLabelsMatchers.some((rl) => workflowJobLabels.every((wl) => rl.includes(wl.toLowerCase())))
-    : runnerLabelsMatchers.some((rl) => workflowJobLabels.some((wl) => rl.includes(wl.toLowerCase())));
-  const match = workflowJobLabels.length === 0 ? !matchLabels : matchLabels;
+    ? runnerLabelsMatchers.some((rl) => filteredLabels.every((wl) => rl.includes(wl.toLowerCase())))
+    : runnerLabelsMatchers.some((rl) => filteredLabels.some((wl) => rl.includes(wl.toLowerCase())));
+  const match = filteredLabels.length === 0 ? !matchLabels : matchLabels;
 
   logger.debug(
     `Received workflow job event with labels: '${JSON.stringify(workflowJobLabels)}'. The event does ${

@@ -6,12 +6,13 @@ const logger = createChildLogger('sqs');
 
 export interface ActionRequestMessage {
   id: number;
-  eventType: string;
   repositoryName: string;
   repositoryOwner: string;
+  eventType: string;
   installationId: number;
   queueId: string;
   repoOwnerType: string;
+  labels?: string[];
 }
 
 export interface MatcherConfig {

@@ -187,8 +187,9 @@ module "runners" {
   github_app_parameters                = local.github_app_parameters
   enable_organization_runners          = var.enable_organization_runners
   enable_ephemeral_runners             = var.enable_ephemeral_runners
-  enable_jit_config                    = var.enable_jit_config
+  enable_dynamic_ec2_types             = var.enable_dynamic_ec2_types
   enable_job_queued_check              = var.enable_job_queued_check
+  enable_jit_config                    = var.enable_jit_config
   enable_on_demand_failover_for_errors = var.enable_runner_on_demand_failover_for_errors
   disable_runner_autoupdate            = var.disable_runner_autoupdate
   enable_managed_runner_security_group = var.enable_managed_runner_security_group

@@ -75,6 +75,7 @@ variable "multi_runner_config" {
       disable_runner_autoupdate               = optional(bool, false)
       ebs_optimized                           = optional(bool, false)
       enable_ephemeral_runners                = optional(bool, false)
+      enable_dynamic_ec2_types                = optional(bool, false)
       enable_job_queued_check                 = optional(bool, null)
       enable_on_demand_failover_for_errors    = optional(list(string), [])
       enable_organization_runners             = optional(bool, false)
@@ -179,7 +180,8 @@ variable "multi_runner_config" {
         disable_runner_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
         ebs_optimized: "The EC2 EBS optimized configuration."
         enable_ephemeral_runners: "Enable ephemeral runners, runners will only be used once."
-        enable_job_queued_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
+        enable_dynamic_ec2_types: "Enable dynamic EC2 instance types based on workflow job labels. When enabled, jobs can request specific instance types via the 'gh-ec2-instance-type' label (e.g., 'gh-ec2-t3.large')."
+        enable_job_queued_check: "(Optional) Only scale if the job event received by the scale up lambda is is in the state queued. By default enabled for non ephemeral runners and disabled for ephemeral. Set this variable to overwrite the default behavior."
         enable_on_demand_failover_for_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
         enable_organization_runners: "Register runners to organization, instead of repo level"
         enable_runner_binaries_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."

@@ -147,6 +147,7 @@ yarn run dist
 | <a name="input_ebs_optimized"></a> [ebs\_optimized](#input\_ebs\_optimized) | The EC2 EBS optimized configuration. | `bool` | `false` | no |
 | <a name="input_egress_rules"></a> [egress\_rules](#input\_egress\_rules) | List of egress rules for the GitHub runner instances. | <pre>list(object({<br/>    cidr_blocks      = list(string)<br/>    ipv6_cidr_blocks = list(string)<br/>    prefix_list_ids  = list(string)<br/>    from_port        = number<br/>    protocol         = string<br/>    security_groups  = list(string)<br/>    self             = bool<br/>    to_port          = number<br/>    description      = string<br/>  }))</pre> | <pre>[<br/>  {<br/>    "cidr_blocks": [<br/>      "0.0.0.0/0"<br/>    ],<br/>    "description": null,<br/>    "from_port": 0,<br/>    "ipv6_cidr_blocks": [<br/>      "::/0"<br/>    ],<br/>    "prefix_list_ids": null,<br/>    "protocol": "-1",<br/>    "security_groups": null,<br/>    "self": null,<br/>    "to_port": 0<br/>  }<br/>]</pre> | no |
 | <a name="input_enable_cloudwatch_agent"></a> [enable\_cloudwatch\_agent](#input\_enable\_cloudwatch\_agent) | Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`. | `bool` | `true` | no |
+| <a name="input_enable_dynamic_ec2_types"></a> [enable\_dynamic\_ec2\_types](#input\_enable\_dynamic\_ec2\_types) | Enable dynamic EC2 instance types based on workflow job labels. When enabled, jobs can request specific instance types via the 'gh:ec2:instance-type' label. | `bool` | `false` | no |
 | <a name="input_enable_ephemeral_runners"></a> [enable\_ephemeral\_runners](#input\_enable\_ephemeral\_runners) | Enable ephemeral runners, runners will only be used once. | `bool` | `false` | no |
 | <a name="input_enable_jit_config"></a> [enable\_jit\_config](#input\_enable\_jit\_config) | Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI. | `bool` | `null` | no |
 | <a name="input_enable_job_queued_check"></a> [enable\_job\_queued\_check](#input\_enable\_job\_queued\_check) | Only scale if the job event received by the scale up lambda is is in the state queued. By default enabled for non ephemeral runners and disabled for ephemeral. Set this variable to overwrite the default behavior. | `bool` | `null` | no |

@@ -28,6 +28,7 @@ resource "aws_lambda_function" "scale_up" {
       AMI_ID_SSM_PARAMETER_NAME                = var.ami_id_ssm_parameter_name
       DISABLE_RUNNER_AUTOUPDATE                = var.disable_runner_autoupdate
       ENABLE_EPHEMERAL_RUNNERS                 = var.enable_ephemeral_runners
+      ENABLE_DYNAMIC_EC2_TYPES                 = var.enable_dynamic_ec2_types
       ENABLE_JIT_CONFIG                        = var.enable_jit_config
       ENABLE_JOB_QUEUED_CHECK                  = local.enable_job_queued_check
       ENABLE_METRIC_GITHUB_APP_RATE_LIMIT      = var.metrics.enable && var.metrics.metric.enable_github_app_rate_limit

@@ -526,6 +526,12 @@ variable "enable_ephemeral_runners" {
   default     = false
 }
 
+variable "enable_dynamic_ec2_types" {
+  description = "Enable dynamic EC2 instance types based on workflow job labels. When enabled, jobs can request specific instance types via the 'gh:ec2:instance-type' label."
+  type        = bool
+  default     = false
+}
+
 variable "enable_job_queued_check" {
   description = "Only scale if the job event received by the scale up lambda is is in the state queued. By default enabled for non ephemeral runners and disabled for ephemeral. Set this variable to overwrite the default behavior."
   type        = bool

@@ -647,6 +647,12 @@ variable "enable_ephemeral_runners" {
   default     = false
 }
 
+variable "enable_dynamic_ec2_types" {
+  description = "Enable dynamic EC2 instance types based on workflow job labels. When enabled, jobs can request specific instance types via the 'gh-ec2-instance-type' label (e.g., 'gh-ec2-t3.large')."
+  type        = bool
+  default     = false
+}
+
 variable "enable_job_queued_check" {
   description = "Only scale if the job event received by the scale up lambda is in the queued state. By default enabled for non ephemeral runners and disabled for ephemeral. Set this variable to overwrite the default behavior."
   type        = bool