|
| 1 | +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. |
| 2 | +# All rights reserved. |
| 3 | +# |
| 4 | +# SPDX-License-Identifier: BSD-3-Clause |
| 5 | +# |
| 6 | +# This buildspec file defines the CI/CD pipeline for IsaacLab. |
| 7 | +# It runs tests on an EC2 instance with GPU support and uses Docker BuildKit |
| 8 | +# for efficient builds with S3 caching. |
| 9 | +# |
| 10 | +# Required environment variables: |
| 11 | +# - ISAACSIM_BASE_IMAGE: Base image for IsaacSim |
| 12 | +# - ISAACSIM_BASE_VERSION: Version of IsaacSim to use |
| 13 | +# |
| 14 | +# Required AWS Secrets: |
| 15 | +# - NGC_TOKEN: NVIDIA NGC authentication token |
| 16 | +# - SSH_KEY: SSH private key for EC2 access |
| 17 | +# - SSH_PUBLIC_KEY: SSH public key for EC2 access |
| 18 | + |
1 | 19 | version: 0.2
|
2 | 20 |
|
| 21 | +env: |
| 22 | + variables: |
| 23 | + # Build configuration |
| 24 | + MAX_RETRIES: "5" |
| 25 | + RETRY_WAIT_TIME: "30" |
| 26 | + |
| 27 | + # EC2 configuration |
| 28 | + INSTANCE_TYPE: "g5.2xlarge" |
| 29 | + VOLUME_SIZE: "500" |
| 30 | + REGION: "us-west-2" |
| 31 | + AZ: "us-west-2a" |
| 32 | + |
| 33 | + # Docker and cache configuration |
| 34 | + ECR_REPOSITORY: "isaaclab-dev" |
| 35 | + CACHE_BUCKET_PREFIX: "isaaclab-build-cache" |
| 36 | + |
| 37 | + # Docker versions |
| 38 | + BUILDX_VERSION: "0.11.2" |
| 39 | + |
| 40 | + secrets-manager: |
| 41 | + NGC_TOKEN: "production/ngc/token" |
| 42 | + SSH_KEY: "production/ssh/isaaclab" |
| 43 | + SSH_PUBLIC_KEY: "production/ssh/isaaclab" |
| 44 | + |
3 | 45 | phases:
|
| 46 | + install: |
| 47 | + runtime-versions: |
| 48 | + python: 3.9 |
| 49 | + commands: |
| 50 | + - echo "Installing required packages..." |
| 51 | + - pip install awscli boto3 |
| 52 | + |
4 | 53 | pre_build:
|
5 | 54 | commands:
|
6 |
| - - echo "Launching EC2 instance to run tests" |
7 | 55 | - |
|
| 56 | + # Validate required environment variables |
| 57 | + if [ -z "$ISAACSIM_BASE_IMAGE" ]; then |
| 58 | + echo "Error: Required environment variable ISAACSIM_BASE_IMAGE is not set" |
| 59 | + exit 1 |
| 60 | + fi |
| 61 | + if [ -z "$ISAACSIM_BASE_VERSION" ]; then |
| 62 | + echo "Error: Required environment variable ISAACSIM_BASE_VERSION is not set" |
| 63 | + exit 1 |
| 64 | + fi |
| 65 | +
|
| 66 | + # Get AWS account ID |
| 67 | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) |
| 68 | + if [ -z "$AWS_ACCOUNT_ID" ]; then |
| 69 | + echo "Error: Failed to get AWS account ID" |
| 70 | + exit 1 |
| 71 | + fi |
| 72 | +
|
| 73 | + # Create ECR repository if it doesn't exist |
| 74 | + aws ecr describe-repositories --repository-names $ECR_REPOSITORY || \ |
| 75 | + aws ecr create-repository --repository-name $ECR_REPOSITORY |
| 76 | +
|
| 77 | + # Configure ECR repository lifecycle policy |
| 78 | + aws ecr put-lifecycle-policy \ |
| 79 | + --repository-name $ECR_REPOSITORY \ |
| 80 | + --lifecycle-policy-text '{ |
| 81 | + "rules": [ |
| 82 | + { |
| 83 | + "rulePriority": 1, |
| 84 | + "description": "Expire images older than 2 weeks", |
| 85 | + "selection": { |
| 86 | + "tagStatus": "any", |
| 87 | + "countType": "sinceImagePushed", |
| 88 | + "countUnit": "days", |
| 89 | + "countNumber": 14 |
| 90 | + }, |
| 91 | + "action": { |
| 92 | + "type": "expire" |
| 93 | + } |
| 94 | + } |
| 95 | + ] |
| 96 | + }' |
| 97 | +
|
| 98 | + # Create S3 bucket for BuildKit cache if it doesn't exist |
| 99 | + CACHE_BUCKET="${CACHE_BUCKET_PREFIX}-${AWS_ACCOUNT_ID}" |
| 100 | + aws s3api head-bucket --bucket $CACHE_BUCKET || \ |
| 101 | + aws s3 mb s3://$CACHE_BUCKET --region $REGION |
| 102 | +
|
| 103 | + # Configure S3 bucket lifecycle rule for cache expiration |
| 104 | + aws s3api put-bucket-lifecycle-configuration \ |
| 105 | + --bucket $CACHE_BUCKET \ |
| 106 | + --lifecycle-configuration '{ |
| 107 | + "Rules": [ |
| 108 | + { |
| 109 | + "ID": "ExpireBuildKitCache", |
| 110 | + "Status": "Enabled", |
| 111 | + "Filter": { |
| 112 | + "Prefix": "" |
| 113 | + }, |
| 114 | + "Expiration": { |
| 115 | + "Days": 14 |
| 116 | + } |
| 117 | + } |
| 118 | + ] |
| 119 | + }' |
| 120 | +
|
| 121 | + echo "Launching EC2 instance to run tests..." |
8 | 122 | INSTANCE_ID=$(aws ec2 run-instances \
|
9 | 123 | --image-id ami-0e6cc441f9f4caab3 \
|
10 | 124 | --count 1 \
|
11 |
| - --instance-type g5.2xlarge \ |
| 125 | + --instance-type $INSTANCE_TYPE \ |
12 | 126 | --key-name production/ssh/isaaclab \
|
13 | 127 | --security-group-ids sg-02617e4b8916794c4 \
|
14 | 128 | --subnet-id subnet-0907ceaeb40fd9eac \
|
15 |
| - --block-device-mappings '[{"DeviceName":"/dev/sda1","Ebs":{"VolumeSize":500}}]' \ |
| 129 | + --iam-instance-profile Name="IsaacLabBuildRole" \ |
| 130 | + --block-device-mappings "[{\"DeviceName\":\"/dev/sda1\",\"Ebs\":{\"VolumeSize\":$VOLUME_SIZE}}]" \ |
16 | 131 | --output text \
|
17 | 132 | --query 'Instances[0].InstanceId')
|
18 |
| - - aws ec2 wait instance-running --instance-ids $INSTANCE_ID |
19 |
| - - | |
| 133 | +
|
| 134 | + echo "Waiting for instance $INSTANCE_ID to be running..." |
| 135 | + aws ec2 wait instance-running --instance-ids $INSTANCE_ID |
| 136 | +
|
| 137 | + echo "Getting instance IP address..." |
20 | 138 | EC2_INSTANCE_IP=$(aws ec2 describe-instances \
|
21 | 139 | --filters "Name=instance-state-name,Values=running" "Name=instance-id,Values=$INSTANCE_ID" \
|
22 | 140 | --query 'Reservations[*].Instances[*].[PrivateIpAddress]' \
|
23 | 141 | --output text)
|
24 |
| - - mkdir -p ~/.ssh |
25 |
| - - | |
| 142 | +
|
| 143 | + echo "Setting up SSH configuration..." |
| 144 | + mkdir -p ~/.ssh |
26 | 145 | aws ec2 describe-key-pairs --include-public-key --key-name production/ssh/isaaclab \
|
27 | 146 | --query 'KeyPairs[0].PublicKey' --output text > ~/.ssh/id_rsa.pub
|
28 |
| - - | |
29 |
| - aws secretsmanager get-secret-value --secret-id production/ssh/isaaclab \ |
30 |
| - --query SecretString --output text > ~/.ssh/id_rsa |
31 |
| - - chmod 400 ~/.ssh/id_* |
32 |
| - - echo "Host $EC2_INSTANCE_IP\n\tStrictHostKeyChecking no\n" >> ~/.ssh/config |
33 |
| - - | |
| 147 | + echo "$SSH_KEY" > ~/.ssh/id_rsa |
| 148 | + chmod 400 ~/.ssh/id_* |
| 149 | + echo "Host $EC2_INSTANCE_IP\n\tStrictHostKeyChecking no\n\tUserKnownHostsFile=/dev/null\n" >> ~/.ssh/config |
| 150 | +
|
| 151 | + echo "Sending SSH public key to instance..." |
34 | 152 | aws ec2-instance-connect send-ssh-public-key \
|
35 | 153 | --instance-id $INSTANCE_ID \
|
36 |
| - --availability-zone us-west-2a \ |
| 154 | + --availability-zone $AZ \ |
37 | 155 | --ssh-public-key file://~/.ssh/id_rsa.pub \
|
38 | 156 | --instance-os-user ubuntu
|
| 157 | +
|
39 | 158 | build:
|
40 | 159 | commands:
|
41 |
| - - echo "Running tests on EC2 instance" |
42 |
| - - SRC_DIR=$(basename $CODEBUILD_SRC_DIR) |
43 |
| - - cd .. |
44 | 160 | - |
|
45 |
| - bash -c ' |
46 |
| - function retry_scp() { |
47 |
| - local retries=5 |
48 |
| - local wait_time=30 |
49 |
| - local count=0 |
50 |
| - while [ $count -lt $retries ]; do |
| 161 | + #!/bin/sh |
| 162 | + set -e |
| 163 | +
|
| 164 | + echo "Running tests on EC2 instance..." |
| 165 | + SRC_DIR=$(basename $CODEBUILD_SRC_DIR) |
| 166 | + cd .. |
| 167 | +
|
| 168 | + # Retry SCP with exponential backoff |
| 169 | + retry_count=0 |
| 170 | + wait_time=$RETRY_WAIT_TIME |
| 171 | +
|
| 172 | + while [ $retry_count -lt $MAX_RETRIES ]; do |
| 173 | + if [ $retry_count -gt 0 ]; then |
| 174 | + wait_time=$((wait_time * 2)) |
| 175 | + echo "Retry attempt $((retry_count + 1))/$MAX_RETRIES. Waiting $wait_time seconds..." |
51 | 176 | sleep $wait_time
|
52 |
| - scp -r $SRC_DIR ubuntu@$EC2_INSTANCE_IP:~ |
53 |
| - if [ $? -eq 0 ]; then |
54 |
| - echo "SCP command succeeded" |
55 |
| - return 0 |
56 |
| - fi |
57 |
| - count=$((count + 1)) |
58 |
| - echo "SCP command failed. Retrying in $wait_time seconds..." |
59 |
| - done |
60 |
| - echo "SCP command failed after $retries attempts." |
61 |
| - return 1 |
62 |
| - } |
63 |
| - retry_scp |
64 |
| - ' |
65 |
| - - ssh ubuntu@$EC2_INSTANCE_IP "docker login -u \\\$oauthtoken -p $NGC_TOKEN nvcr.io" |
66 |
| - - | |
67 |
| - ssh ubuntu@$EC2_INSTANCE_IP " |
| 177 | + fi |
| 178 | +
|
| 179 | + if scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no -r $SRC_DIR ubuntu@$EC2_INSTANCE_IP:~; then |
| 180 | + echo "SCP command succeeded" |
| 181 | + break |
| 182 | + fi |
| 183 | +
|
| 184 | + retry_count=$((retry_count + 1)) |
| 185 | + done |
| 186 | +
|
| 187 | + if [ $retry_count -eq $MAX_RETRIES ]; then |
| 188 | + echo "SCP command failed after $MAX_RETRIES attempts" |
| 189 | + exit 1 |
| 190 | + fi |
| 191 | +
|
| 192 | + # Get ECR login token |
| 193 | + ECR_LOGIN_TOKEN=$(aws ecr get-login-password --region $REGION) |
| 194 | +
|
| 195 | + # Run tests with proper error handling and Docker caching |
| 196 | + ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no ubuntu@$EC2_INSTANCE_IP " |
| 197 | + set -e |
| 198 | +
|
| 199 | + # Install Docker with BuildKit support |
| 200 | + echo 'Installing Docker with BuildKit support...' |
| 201 | + sudo apt-get update |
| 202 | + sudo apt-get install -y apt-transport-https ca-certificates curl software-properties-common |
| 203 | + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - |
| 204 | + sudo add-apt-repository \"deb [arch=amd64] https://download.docker.com/linux/ubuntu \$(lsb_release -cs) stable\" |
| 205 | + sudo apt-get update |
| 206 | + sudo apt-get install -y docker-ce docker-ce-cli containerd.io |
| 207 | +
|
| 208 | + # Enable BuildKit at daemon level |
| 209 | + sudo mkdir -p /etc/docker |
| 210 | + echo '{\"features\":{\"buildkit\":true}}' | sudo tee /etc/docker/daemon.json |
| 211 | +
|
| 212 | + # Install Docker Buildx |
| 213 | + echo 'Installing Docker Buildx...' |
| 214 | + mkdir -p ~/.docker/cli-plugins/ |
| 215 | + curl -SL https://github.com/docker/buildx/releases/download/v$BUILDX_VERSION/buildx-v$BUILDX_VERSION.linux-amd64 -o ~/.docker/cli-plugins/docker-buildx |
| 216 | + chmod a+x ~/.docker/cli-plugins/docker-buildx |
| 217 | +
|
| 218 | + # Add current user to docker group |
| 219 | + sudo usermod -aG docker ubuntu |
| 220 | + newgrp docker |
| 221 | +
|
| 222 | + echo 'Logging into NGC...' |
| 223 | + docker login -u \\\$oauthtoken -p $NGC_TOKEN nvcr.io |
| 224 | +
|
| 225 | + # Login to ECR using token from CodeBuild |
| 226 | + echo \"$ECR_LOGIN_TOKEN\" | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com |
| 227 | +
|
68 | 228 | cd $SRC_DIR
|
69 |
| - DOCKER_BUILDKIT=1 docker build -t isaac-lab-dev \ |
70 |
| - --build-arg ISAACSIM_BASE_IMAGE_ARG=$ISAACSIM_BASE_IMAGE \ |
71 |
| - --build-arg ISAACSIM_VERSION_ARG=$ISAACSIM_BASE_VERSION \ |
72 |
| - --build-arg ISAACSIM_ROOT_PATH_ARG=/isaac-sim \ |
73 |
| - --build-arg ISAACLAB_PATH_ARG=/workspace/isaaclab \ |
74 |
| - --build-arg DOCKER_USER_HOME_ARG=/root \ |
75 |
| - -f docker/Dockerfile.base . |
| 229 | + echo 'Building Docker image with BuildKit caching...' |
| 230 | +
|
| 231 | + # Configure BuildKit environment |
| 232 | + export DOCKER_BUILDKIT=1 |
| 233 | + export BUILDKIT_INLINE_CACHE=1 |
| 234 | +
|
| 235 | + # Create a new builder instance with S3 cache support |
| 236 | + docker buildx create --name mybuilder --driver docker-container --bootstrap |
| 237 | + docker buildx use mybuilder |
| 238 | +
|
| 239 | + # Build with BuildKit and S3 cache |
| 240 | + if docker pull $AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:latest 2>/dev/null; then |
| 241 | + echo "Using existing image for cache..." |
| 242 | + docker buildx build --progress=plain --platform linux/amd64 -t isaac-lab-dev \ |
| 243 | + --cache-from type=registry,ref=$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:latest \ |
| 244 | + --cache-to type=s3,region=$REGION,bucket=$CACHE_BUCKET,mode=max,ignore-error=true \ |
| 245 | + --build-arg ISAACSIM_BASE_IMAGE_ARG=$ISAACSIM_BASE_IMAGE \ |
| 246 | + --build-arg ISAACSIM_VERSION_ARG=$ISAACSIM_BASE_VERSION \ |
| 247 | + --build-arg ISAACSIM_ROOT_PATH_ARG=/isaac-sim \ |
| 248 | + --build-arg ISAACLAB_PATH_ARG=/workspace/isaaclab \ |
| 249 | + --build-arg DOCKER_USER_HOME_ARG=/root \ |
| 250 | + -f docker/Dockerfile.base \ |
| 251 | + --load . |
| 252 | + else |
| 253 | + echo "No existing image found, building without cache-from..." |
| 254 | + docker buildx build --progress=plain --platform linux/amd64 -t isaac-lab-dev \ |
| 255 | + --cache-to type=s3,region=$REGION,bucket=$CACHE_BUCKET,mode=max,ignore-error=true \ |
| 256 | + --build-arg ISAACSIM_BASE_IMAGE_ARG=$ISAACSIM_BASE_IMAGE \ |
| 257 | + --build-arg ISAACSIM_VERSION_ARG=$ISAACSIM_BASE_VERSION \ |
| 258 | + --build-arg ISAACSIM_ROOT_PATH_ARG=/isaac-sim \ |
| 259 | + --build-arg ISAACLAB_PATH_ARG=/workspace/isaaclab \ |
| 260 | + --build-arg DOCKER_USER_HOME_ARG=/root \ |
| 261 | + -f docker/Dockerfile.base \ |
| 262 | + --load . |
| 263 | + fi |
| 264 | +
|
| 265 | + echo 'Running tests...' |
| 266 | + TEST_EXIT_CODE=0 |
76 | 267 | docker run --rm --entrypoint bash --gpus all --network=host \
|
77 |
| - --name isaac-lab-test isaac-lab-dev ./isaaclab.sh -t && exit \$? |
78 |
| - " |
| 268 | + --name isaac-lab-test -v ~/$SRC_DIR/reports:/workspace/IsaacLab/tests isaac-lab-dev \ |
| 269 | + /isaac-sim/python.sh -m \ |
| 270 | + pytest tools -v || TEST_EXIT_CODE=$? |
| 271 | +
|
| 272 | + echo "Test exit code: $TEST_EXIT_CODE" > ~/$SRC_DIR/test_exit_code.txt |
| 273 | + " || { echo "Test execution failed"; exit 1; } |
| 274 | +
|
| 275 | + echo "Copying test reports..." |
| 276 | + mkdir -p $CODEBUILD_SRC_DIR/reports |
| 277 | + scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no -r ubuntu@$EC2_INSTANCE_IP:~/$SRC_DIR/reports/test-reports.xml $CODEBUILD_SRC_DIR/reports/ |
| 278 | + scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no ubuntu@$EC2_INSTANCE_IP:~/$SRC_DIR/test_exit_code.txt $CODEBUILD_SRC_DIR/ |
| 279 | +
|
| 280 | + if [ "$(cat $CODEBUILD_SRC_DIR/test_exit_code.txt)" != "0" ]; then |
| 281 | + echo "Tests failed with exit code $(cat $CODEBUILD_SRC_DIR/test_exit_code.txt)" |
| 282 | + exit 1 |
| 283 | + fi |
79 | 284 |
|
80 | 285 | post_build:
|
81 | 286 | commands:
|
82 |
| - - echo "Terminating EC2 instance" |
83 |
| - - aws ec2 terminate-instances --instance-ids $INSTANCE_ID |
| 287 | + - | |
| 288 | + echo "Cleaning up resources..." |
| 289 | + if [ ! -z "$INSTANCE_ID" ]; then |
| 290 | + echo "Terminating EC2 instance $INSTANCE_ID..." |
| 291 | + aws ec2 terminate-instances --instance-ids $INSTANCE_ID || true |
| 292 | + fi |
| 293 | +
|
| 294 | +reports: |
| 295 | + pytest_reports: |
| 296 | + files: |
| 297 | + - 'reports/test-reports.xml' |
| 298 | + base-directory: '.' |
| 299 | + file-format: JUNITXML |
| 300 | + |
| 301 | +cache: |
| 302 | + paths: |
| 303 | + - '/root/.cache/pip/**/*' |
| 304 | + - '/root/.docker/**/*' |
| 305 | + - '/root/.aws/**/*' |
0 commit comments