diff --git a/.github/workflows/cleanup-self-hosted-runners.yml b/.github/workflows/cleanup-self-hosted-runners.yml new file mode 100644 index 00000000..4e16891c --- /dev/null +++ b/.github/workflows/cleanup-self-hosted-runners.yml @@ -0,0 +1,70 @@ +name: Cleanup Azure self hosted runners +run-name: Cleanup Azure self hosted runners + +on: + schedule: + # Run every 6 hours + - cron: "0 */6 * * *" + workflow_dispatch: + +# The following secrets are required for this workflow to run: +# AZURE_CREDENTIALS - Credentials for the Azure CLI. It's recommended to set up a resource +# group specifically for self-hosted Actions Runners. +# az ad sp create-for-rbac --name "{YOUR_DESCRIPTIVE_NAME_HERE}" --role contributor \ +# --scopes /subscriptions/{SUBSCRIPTION_ID_HERE}/resourceGroups/{RESOURCE_GROUP_HERE} \ +# --sdk-auth +# AZURE_RESOURCE_GROUP - Resource group to create the runner(s) in +jobs: + delete-runner: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Azure Login + uses: azure/login@v2 + with: + creds: ${{ secrets.AZURE_CREDENTIALS }} + - name: Discover VMs to delete + uses: azure/CLI@v2 + env: + GH_APP_ID: ${{ secrets.GH_APP_ID }} + GH_APP_PRIVATE_KEY: ${{ secrets.GH_APP_PRIVATE_KEY }} + with: + # Stick to 2.63.0 until jq is added to 2.64.0+ https://github.com/Azure/azure-cli/issues/29830 + azcliversion: 2.63.0 + inlineScript: | + active_vms=$(az vm list -g ${{ secrets.AZURE_RESOURCE_GROUP }} | jq -c '.[] | {name,timeCreated}') + current_time=$(date +%s) + one_hour_ago=$(($current_time - 3600)) + + if [ -z "$active_vms" ]; then + echo "No active VMs found, nothing to do." + exit 0 + else + echo "Found these active VMs:" + echo $active_vms + fi + + for active_vm in ${active_vms[@]}; do + vm_name=$(echo $active_vm | jq '.name') + # Use jq to extract and format the date-time string + vm_creation_time_string="$(echo $active_vm | + jq -r '.timeCreated | sub("\\.[0-9]+[+-][0-9]+:[0-9]+$"; "") | sub("T"; " ")')" + vm_creation_time=$(TZ=UTC date -d "$vm_creation_time_string" +%s) + + if [ "$one_hour_ago" -lt "$vm_creation_time" ]; then + echo "::notice::The VM ${vm_name} was created less then 1 hour ago and shouldn't be deleted yet. Skipping." + elif test true = "$(if test ! -f .cli-authenticated; then + ./gh-cli-auth-as-app.sh && + >.cli-authenticated # only authenticate once + fi && + gh api repos/$GITHUB_REPOSITORY/actions/runners \ + --jq '.runners[] | select(.name == "'$vm_name'") | .busy')"; then + echo "::notice::The VM ${vm_name} is still busy." + else + echo "::warning::The VM ${vm_name} was created more than 3 hours ago and wasn't deleted. Let's do that now." + az vm delete -n "$vm_name" -g ${{ secrets.AZURE_RESOURCE_GROUP }} --yes + az network nsg delete -n "$vm_name"-nsg -g ${{ secrets.AZURE_RESOURCE_GROUP }} + az network vnet delete -n "$vm_name"-vnet -g ${{ secrets.AZURE_RESOURCE_GROUP }} + az network public-ip delete -n "$vm_name"-ip -g ${{ secrets.AZURE_RESOURCE_GROUP }} + fi + done diff --git a/gh-cli-auth-as-app.sh b/gh-cli-auth-as-app.sh new file mode 100755 index 00000000..5641f982 --- /dev/null +++ b/gh-cli-auth-as-app.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +node -e '(async () => { + const [owner, repo] = process.env.GITHUB_REPOSITORY.split("/") + const getAppInstallationId = require("./get-app-installation-id") + const installationId = await getAppInstallationId( + console, + process.env.GH_APP_ID, + process.env.GH_APP_PRIVATE_KEY, + owner, + repo + ) + const getInstallationAccessToken = require("./get-installation-access-token") + const token = await getInstallationAccessToken( + console, + process.env.GH_APP_ID, + process.env.GH_APP_PRIVATE_KEY, + installationId + ) + process.stderr.write(`::add-mask::${token.token}\n`) + process.stdout.write(token.token) +})().catch(e => { + process.stderr.write(JSON.stringify(e, null, 2)) + process.exit(1) +})' | gh auth login --with-token