Skip to content

[ci rerun-skip] replace pocket interactions dataset with newtab visit… #79

[ci rerun-skip] replace pocket interactions dataset with newtab visit…

[ci rerun-skip] replace pocket interactions dataset with newtab visit… #79

name: "Rerun jetstream"
on:
push:
branches:
- main
paths:
- 'jetstream/**'
jobs:
changed:
uses: ./.github/workflows/changed-files.yml
with:
path_filter: |
jetstream/**
rerun-jetstream:
runs-on: ubuntu-latest
needs: changed
permissions:
contents: read
id-token: write
if: needs.changed.outputs.any_changed == 'true'
env:
CLUSTER_NAME: 'jetstream-dev'
CLUSTER_ZONE: 'us-central1-c'
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Authenticate to GCP
uses: google-github-actions/auth@v2
id: google_auth
with:
workload_identity_provider: ${{ vars.GCPV2_GITHUB_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_JETSTREAM_SERVICE_ACCOUNT_EMAIL }}
project_id: ${{ vars.GCLOUD_PROJECT }}
access_token_lifetime: 43200s
- name: Set up GKE
uses: google-github-actions/get-gke-credentials@v2
with:
cluster_name: ${{ env.CLUSTER_NAME }}
location: ${{ env.CLUSTER_ZONE }}
- name: Rerun jetstream
shell: bash
env:
changed_files: ${{ needs.changed.outputs.all_changed_files }}
commit_message: ${{ github.event.head_commit.message }}
run: |
ERROR_DASHBOARD_URL="https://mozilla.cloud.looker.com/dashboards/246"
# determine slugs of configs that got changed; filter out outcomes and defaults
params=""
for config in $changed_files; do
slug=${config##*/}
slug="${slug%.*}"
if [ -e jetstream/"$slug".toml ]; then
params+="--experiment_slug=${slug} "
fi
done
echo "Latest change affected these experiments:"
echo $params
# stop running instances so that they do not interfere
uuid=`uuidgen`
pod_identifier="jetstream-${uuid}"
echo "Deleting pod [$pod_identifier]"
echo
kubectl delete pod -l app=$pod_identifier
cur_date=`date -u +%FT%TZ`
LOGS_URL="https://console.cloud.google.com/logs/query;query=resource.type%3D%22k8s_container%22%0Aresource.labels.cluster_name%3D%22${{ env.CLUSTER_NAME }}%22%0Aresource.labels.location%3D%22${{ env.CLUSTER_ZONE }}%22%0Aresource.labels.namespace_name%3D%22argo%22;cursorTimestamp=$curDate?project=${{ vars.GCLOUD_PROJECT }}"
echo "Checking commit message: $commit_message"
if [[ "$commit_message" == *"[ci rerun-skip]"* ]] || [[ "$commit_message" == *"[ci rerun_skip]"* ]] || [[ "$commit_message" == *"[ci skip-rerun]"* ]] || [[ "$commit_message" == *"[ci skip_rerun]"* ]]; then
echo "Skip rerun for files:"
echo $changed_files
echo "[$(date +%FT%T%Z)] Running command: 'jetstream rerun-skip $params'"
# start a new instance
kubectl run $pod_identifier --image=gcr.io/${{ vars.GCLOUD_PROJECT }}/jetstream -l app=$pod_identifier --restart=Never --command -- jetstream rerun-skip $params
else
echo "Rerun changed files: "
echo $changed_files
echo "[$(date +%FT%T%Z)] Running command: 'jetstream rerun --argo --return-status --recreate-enrollments $params'"
# start a new instance
kubectl run $pod_identifier --image=gcr.io/${{ vars.GCLOUD_PROJECT }}/jetstream -l app=$pod_identifier --restart=Never --command -- jetstream rerun --argo --return-status --recreate-enrollments $params
# link to logs
echo "Pod Logs: $LOGS_URL"
echo "Analysis Errors can be accessed via Looker: $ERROR_DASHBOARD_URL"
# wait for pod to finish and check status
fi
running=true
while [ $running = true ]
do
echo "[$(date +%FT%T%Z)] Wait for jetstream to finish"
pod_status=`kubectl get pod -l app=$pod_identifier --no-headers -o custom-columns=":status.phase"`
if [ $pod_status = 'Succeeded' ] || [ $pod_status = 'Failed' ]; then
running=false
fi
sleep 10
done
# delete pod
echo "Deleting pod [$pod_identifier]"
echo
kubectl delete pod -l app=$pod_identifier
if [ $pod_status = 'Failed' ]; then
echo "[$(date +%FT%T%Z)] Error when running jetstream. Check the pod logs or error dashboard for more information."
echo "Pod Logs: $LOGS_URL"
echo "Analysis Errors dashboard: $ERROR_DASHBOARD_URL"
exit 1
elif [ $pod_status = 'Succeeded' ]; then
echo "[$(date +%FT%T%Z)] Jetstream successfully completed."
else
echo "[$(date +%FT%T%Z)] Jetstream completed in unknown status. Please check the pod logs or error dashboard."
echo "Pod Logs: $LOGS_URL"
echo "Analysis Errors dashboard: $ERROR_DASHBOARD_URL"
exit 1
fi