Skip to content

Commit d6323ff

Browse files
committed
rebuild the AKS build cluster
1 parent cd53803 commit d6323ff

25 files changed

Lines changed: 679 additions & 52 deletions

File tree

infra/azure/terraform/k8s-infra-prow-build/aks.tf

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,23 @@ limitations under the License.
1515
*/
1616

1717
module "prow_build" {
18-
source = "Azure/aks/azurerm"
19-
version = "9.2.0"
18+
source = "Azure/aks/azurerm//v4"
19+
version = "10.0.0"
2020
resource_group_name = azurerm_resource_group.rg.name
2121
location = azurerm_resource_group.rg.location
2222
sku_tier = "Standard"
23-
automatic_channel_upgrade = "patch"
24-
kubernetes_version = "1.32"
25-
prefix = "k8s-infra"
23+
automatic_channel_upgrade = "stable"
24+
# kubernetes_version = "1.34"
25+
prefix = "k8s-infra"
2626

2727
role_based_access_control_enabled = true
2828
workload_identity_enabled = true
2929
oidc_issuer_enabled = true
30-
rbac_aad = true
31-
rbac_aad_managed = true
32-
local_account_disabled = false
30+
rbac_aad_azure_rbac_enabled = true
31+
rbac_aad_admin_group_object_ids = [
32+
"2d1bde94-76f6-4538-9ac0-1b2ef459ba15" # aks-admins
33+
]
34+
local_account_disabled = false
3335

3436
identity_type = "UserAssigned"
3537
identity_ids = [azurerm_user_assigned_identity.aks_identity.id]
@@ -42,10 +44,13 @@ module "prow_build" {
4244
user_assigned_identity_id = azurerm_user_assigned_identity.aks_kubelet_identity.id
4345
}
4446

45-
ebpf_data_plane = "cilium"
46-
network_plugin_mode = "overlay"
47-
network_plugin = "azure"
48-
network_policy = "cilium"
47+
ebpf_data_plane = "cilium"
48+
network_plugin_mode = "overlay"
49+
network_plugin = "azure"
50+
network_policy = "cilium"
51+
network_ip_versions = ["IPv4", "IPv6"]
52+
network_data_plane = "cilium"
53+
net_profile_pod_cidrs = ["10.244.0.0/16", "fd12:3456:789a::/64"]
4954

5055
enable_auto_scaling = true
5156
node_resource_group = "MC_${local.prefix}-prow-build-${azurerm_resource_group.rg.location}-aks-rg"
@@ -60,25 +65,47 @@ module "prow_build" {
6065
agents_max_pods = 110
6166
agents_type = "VirtualMachineScaleSets"
6267
agents_availability_zones = ["1", "3"]
63-
os_sku = "AzureLinux"
68+
os_sku = "Ubuntu"
6469
agents_size = "Standard_D4ds_v5"
6570
only_critical_addons_enabled = true
6671
temporary_name_for_rotation = "tmpnodepool1"
6772
agents_tags = var.common_tags
68-
vnet_subnet_id = module.prow_network.subnets.prow_build_aks.resource_id
73+
vnet_subnet = {
74+
id = module.prow_network.subnets.prow_build_aks.resource_id
75+
}
6976

7077
storage_profile_enabled = true
7178
storage_profile_blob_driver_enabled = false
7279
storage_profile_file_driver_enabled = false
7380

7481
node_pools = {
75-
pool1 = {
76-
name = "pool1"
77-
vm_size = "Standard_E8ds_v5"
82+
pool-amd64 = {
83+
name = "amd64"
84+
vm_size = "Standard_D8ads_v6"
85+
enable_auto_scaling = true
86+
kubelet_disk_type = "OS"
87+
min_count = 3
88+
max_count = 100
89+
max_pods = 110
90+
os_disk_type = "Ephemeral"
91+
os_disk_size_gb = 100
92+
os_sku = "Ubuntu"
93+
vnet_subnet_id = module.prow_network.subnets.prow_build_aks.resource_id
94+
95+
upgrade_settings = {
96+
max_surge = "33%"
97+
drain_timeout_in_minutes = 90
98+
node_soak_duration_in_minutes = 1
99+
}
100+
}
101+
pool-arm64 = {
102+
name = "arm64"
103+
vm_size = "Standard_D8pds_v6"
78104
enable_auto_scaling = true
79105
kubelet_disk_type = "OS"
80106
min_count = 3
81-
max_count = 200
107+
max_count = 100
108+
max_pods = 110
82109
os_disk_type = "Ephemeral"
83110
os_disk_size_gb = 100
84111
os_sku = "Ubuntu"

infra/azure/terraform/k8s-infra-prow-build/network.tf

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@ limitations under the License.
1515
*/
1616

1717
module "prow_network" {
18-
source = "Azure/avm-res-network-virtualnetwork/azurerm"
19-
version = "0.6.0"
20-
name = "vnet-${azurerm_resource_group.rg.name}"
21-
resource_group_name = azurerm_resource_group.rg.name
22-
location = azurerm_resource_group.rg.location
23-
address_space = ["10.52.0.0/16"]
18+
source = "Azure/avm-res-network-virtualnetwork/azurerm"
19+
version = "0.16.0"
20+
name = "vnet-${azurerm_resource_group.rg.name}"
21+
parent_id = azurerm_resource_group.rg.id
22+
location = azurerm_resource_group.rg.location
23+
address_space = ["10.52.0.0/16", "fd00:d2cc:d945::/48"]
2424
subnets = {
2525
"prow_build_aks" = {
2626
name = "snet-${azurerm_resource_group.rg.name}"
27-
address_prefixes = ["10.52.1.0/24"]
27+
address_prefixes = ["10.52.0.0/22", "fd00:d2cc:d945:1::/64"]
2828
service_endpoints = ["Microsoft.Storage", "Microsoft.ContainerRegistry"]
2929
private_endpoint_network_policies_enabled = false
3030
}
@@ -35,13 +35,12 @@ module "prow_network" {
3535
}
3636

3737
module "private_dns_zones" {
38-
source = "Azure/avm-ptn-network-private-link-private-dns-zones/azurerm"
39-
version = "0.4.0"
40-
location = azurerm_resource_group.rg.location
41-
resource_group_name = azurerm_resource_group.rg.name
42-
resource_group_creation_enabled = false
43-
tags = var.common_tags
44-
enable_telemetry = false
38+
source = "Azure/avm-ptn-network-private-link-private-dns-zones/azurerm"
39+
version = "0.23.0"
40+
parent_id = azurerm_resource_group.rg.id
41+
location = azurerm_resource_group.rg.location
42+
tags = var.common_tags
43+
enable_telemetry = false
4544
private_link_private_dns_zones = {
4645
azure_aks_mgmt = {
4746
zone_name = "privatelink.{regionName}.azmk8s.io"
@@ -66,9 +65,9 @@ module "private_dns_zones" {
6665
}
6766
}
6867

69-
virtual_network_resource_ids_to_link_to = {
68+
virtual_network_link_additional_virtual_networks = {
7069
"vnet_prow_build_aks" = {
71-
vnet_resource_id = module.prow_network.resource_id
70+
virtual_network_resource_id = module.prow_network.resource_id
7271
}
7372
}
7473
}

infra/azure/terraform/k8s-infra-prow-build/providers.tf

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,15 @@ limitations under the License.
1515
*/
1616

1717
terraform {
18-
required_version = "~> 1.9.0"
18+
required_version = "~> 1.11.4"
1919
required_providers {
2020
azurerm = {
2121
source = "hashicorp/azurerm"
22-
version = ">= 3.51, < 4.0"
22+
version = ">= 4.57, < 5.0"
23+
}
24+
azapi = {
25+
source = "azure/azapi"
26+
version = ">= 2.8, < 3"
2327
}
2428
}
2529

infra/azure/terraform/k8s-infra-prow-build/rbac.tf

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,48 @@ resource "azurerm_role_assignment" "kubelet_mi_operator" {
3434
scope = azurerm_resource_group.rg.id
3535
principal_id = azurerm_user_assigned_identity.aks_kubelet_identity.principal_id
3636
}
37+
38+
// aks-admin managed identity
39+
resource "azurerm_user_assigned_identity" "aks_admin" {
40+
location = azurerm_resource_group.rg.location
41+
name = "aks-admin"
42+
resource_group_name = azurerm_resource_group.rg.name
43+
}
44+
45+
resource "azurerm_federated_identity_credential" "aks_admin_argocd" {
46+
for_each = toset([
47+
"system:serviceaccount:argocd:argocd-application-controller",
48+
"system:serviceaccount:argocd:argocd-server",
49+
])
50+
name = "argocd-${reverse(split(":", each.key))[0]}"
51+
resource_group_name = azurerm_resource_group.rg.name
52+
audience = ["api://AzureADTokenExchange"]
53+
issuer = "https://container.googleapis.com/v1/projects/k8s-infra-prow/locations/us-central1/clusters/utility"
54+
parent_id = azurerm_user_assigned_identity.aks_admin.id
55+
subject = each.key
56+
}
57+
58+
resource "azurerm_federated_identity_credential" "aks_admin_prow" {
59+
// https://github.com/kubernetes/k8s.io/tree/main/kubernetes/gke-prow/prow
60+
// all services that load kubeconfig should be listed here
61+
for_each = toset([
62+
"system:serviceaccount:default:deck",
63+
"system:serviceaccount:default:config-bootstrapper",
64+
"system:serviceaccount:default:crier",
65+
"system:serviceaccount:default:sinker",
66+
"system:serviceaccount:default:prow-controller-manager",
67+
"system:serviceaccount:default:hook",
68+
])
69+
name = "prow-${reverse(split(":", each.key))[0]}"
70+
resource_group_name = azurerm_resource_group.rg.name
71+
audience = ["api://AzureADTokenExchange"]
72+
issuer = "https://container.googleapis.com/v1/projects/k8s-infra-prow/locations/us-central1/clusters/prow"
73+
parent_id = azurerm_user_assigned_identity.aks_admin.id
74+
subject = each.key
75+
}
76+
77+
resource "azurerm_role_assignment" "aks_admin" {
78+
role_definition_name = "Azure Arc Kubernetes Cluster Admin"
79+
scope = azurerm_resource_group.rg.id
80+
principal_id = azurerm_user_assigned_identity.aks_kubelet_identity.principal_id
81+
}

infra/gcp/terraform/k8s-infra-prow-build/iam.tf

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ module "iam" {
3333
"serviceAccount:kubernetes-external-secrets@k8s-infra-prow-build.iam.gserviceaccount.com",
3434
"principal://iam.googleapis.com/projects/${module.project.project_number}/locations/global/workloadIdentityPools/${module.project.project_id}.svc.id.goog/subject/ns/external-secrets/sa/external-secrets",
3535
"principal://iam.googleapis.com/projects/180382678033/locations/global/workloadIdentityPools/k8s-infra-prow-build-trusted.svc.id.goog/subject/ns/external-secrets/sa/external-secrets",
36+
"principal://iam.googleapis.com/projects/16065310909/locations/global/workloadIdentityPools/k8s-infra-prow.svc.id.goog/subject/ns/external-secrets/sa/external-secrets",
3637
]
3738
}
3839
}
@@ -78,3 +79,29 @@ resource "google_iam_workload_identity_pool_provider" "eks_kops" {
7879
allowed_audiences = ["sts.googleapis.com"]
7980
}
8081
}
82+
83+
84+
resource "google_iam_workload_identity_pool" "aks_cluster" {
85+
project = module.project.project_id
86+
87+
workload_identity_pool_id = "prow-aks"
88+
display_name = "AKS Prow Cluster"
89+
description = "Identity pool for CI on Azure using AKS clusters"
90+
}
91+
92+
resource "google_iam_workload_identity_pool_provider" "aks_cluster" {
93+
project = module.project.project_id
94+
95+
display_name = "AKS OIDC provider"
96+
description = "Identity pool for CI on Azure using AKS clusters"
97+
workload_identity_pool_id = google_iam_workload_identity_pool.aks_cluster.workload_identity_pool_id
98+
workload_identity_pool_provider_id = "oidc"
99+
attribute_mapping = {
100+
"google.subject" = "assertion.sub"
101+
}
102+
oidc {
103+
# From AKS cluster created in https://github.com/kubernetes/k8s.io/tree/main/infra/azure/terraform/k8s-infra-prow-build
104+
issuer_uri = "https://eastus2.oic.prod-aks.azure.com/d1aa7522-0959-442e-80ee-8c4f7fb4c184/85d5aa19-bc3c-4cdb-bc17-0cf8703cfa3f"
105+
allowed_audiences = ["sts.googleapis.com"]
106+
}
107+
}

infra/gcp/terraform/k8s-infra-prow-build/serviceaccounts.tf

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ locals {
3030
project_roles = ["roles/secretmanager.secretAccessor"],
3131
cluster_namespace = "kubernetes-external-secrets"
3232
additional_workload_identity_principals = [
33-
"principalSet://iam.googleapis.com/${google_iam_workload_identity_pool.eks_cluster.name}/*"
33+
"principalSet://iam.googleapis.com/${google_iam_workload_identity_pool.eks_cluster.name}/*",
34+
"principalSet://iam.googleapis.com/${google_iam_workload_identity_pool.aks_cluster.name}/*"
3435
]
3536
}
3637
}

kubernetes/README.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
2+
# Kubernetes clusters owned by SIG K8s-Infra
3+
4+
This folder contains the declarative configuration for Kubernetes clusters managed by this repo.
5+
The general pattern is:
6+
7+
- Per-cluster configuration lives in `kubernetes/<cluster-name>/...`.
8+
- Shared workloads are defined as Argo CD Applications/ApplicationSets in `kubernetes/apps/`.
9+
- Argo CD itself runs in the `gke-utility` cluster (see `kubernetes/gke-utility/argocd/`).
10+
11+
We use ArgoCD to manage our cluster, you can access it at argo.k8s.io, to access the app, you need to:
12+
- be a member of the kubernetes github org
13+
- add your github user to the AuthorizationPolicy in this file: `kubernetes/gke-utility/argocd/extras.yaml#L62`
14+
15+
## Clusters managed here
16+
17+
Cluster directories under `kubernetes/` correspond to the clusters Argo CD manages:
18+
19+
- `aks-prow-build` A Prow Build Cluster in AKS
20+
- `eks-prow-build` A Prow Build Cluster in EKS
21+
- `eks-prow-kops` A Prow Build Cluster in EKS
22+
- `gke-aaa` A shared GKE cluster that runs our applications
23+
- `gke-prow` Prow Control Plane Cluster on GKE
24+
- `gke-prow-build` A Prow Build Cluster in GKE
25+
- `gke-prow-build-trusted` A Prow Build Cluster in GKE, for trusted/sensitive jobs
26+
- `gke-utility` A GKE cluster running utility workloads such as ArgoCD, Atlantis, Unified Monitoring Stack, etc
27+
- `ibm-ppc64le` A Prow Build Cluster in IBM
28+
- `ibm-s390x` A Prow Build Cluster in IBM
29+
30+
Cluster registration/labels used by ApplicationSets are defined in `kubernetes/gke-utility/argocd/clusters.yaml`.
31+
32+
## Workloads
33+
34+
This repo manages many workloads; common examples include:
35+
36+
- `prow` this contains all components of prow deployed in test-pods namespace for all build clusters.
37+
- `datadog`, our monitoring, security tooling on all AKS/EKS/GKE clusters
38+
39+
40+
### Note
41+
42+
- The `gke-aaa` kubernetes manifests are not being managed by ArgoCD yet, you can find them in the `apps` folder
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
namespace: datadog
4+
5+
helmCharts:
6+
- name: datadog
7+
repo: https://helm.datadoghq.com
8+
releaseName: datadog
9+
version: 3.157.0
10+
kubeVersion: "1.33"
11+
valuesFile: values.yaml
12+
13+
resources:
14+
- secrets.yaml
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
apiVersion: external-secrets.io/v1
2+
kind: ExternalSecret
3+
metadata:
4+
name: datadog-secret
5+
spec:
6+
dataFrom:
7+
- extract:
8+
key: datadog-secrets
9+
secretStoreRef:
10+
kind: ClusterSecretStore
11+
name: k8s-infra-prow-build
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
registry: datadoghq.azurecr.io
2+
datadog:
3+
apiKeyExistingSecret: datadog-secret
4+
appKeyExistingSecret: datadog-secret
5+
site: us5.datadoghq.com
6+
clusterName: k8s-infra-aks-prow-build
7+
logs:
8+
enabled: true
9+
containerCollectAll: true
10+
prometheusScrape:
11+
enabled: true
12+
serviceEndpoints: true
13+
kubeStateMetricsCore:
14+
enabled: true
15+
networkMonitoring:
16+
enabled: true
17+
processAgent:
18+
enabled: true
19+
processCollection: true
20+
sbom:
21+
enabled: true
22+
containerImage:
23+
enabled: true
24+
uncompressedLayersSupport: true
25+
host:
26+
enabled: true
27+
apm:
28+
instrumentation:
29+
skipKPITelemetry: true # https://github.com/DataDog/helm-charts/issues/1395
30+
clusterAgent:
31+
tokenExistingSecret: datadog-secret
32+
agents:
33+
tolerations: # datadog supports arm64
34+
- key: kubernetes.io/arch
35+
operator: Equal
36+
value: arm64
37+
effect: NoSchedule
38+
providers:
39+
aks:
40+
enabled: true

0 commit comments

Comments
 (0)