Sync bddl Google Sheets data #115
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Sync bddl Google Sheets data | |
| on: | |
| schedule: | |
| - cron: "0 9 * * *" # Every day at 9am | |
| workflow_dispatch: | |
| # push: | |
| # branches: | |
| # - main | |
| jobs: | |
| pull-sheets: | |
| name: Sync Google Sheets data | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v3 | |
| - name: Setup python | |
| uses: actions/setup-python@v2 | |
| with: | |
| python-version: "3.10" | |
| architecture: x64 | |
| - name: Authenticate on Google Cloud | |
| uses: 'google-github-actions/auth@v1' | |
| with: | |
| credentials_json: '${{ secrets.GCP_CREDENTIALS }}' | |
| - uses: webfactory/[email protected] | |
| with: | |
| ssh-private-key: ${{ secrets.SHEETS_DEPLOY_KEY }} | |
| # See if we need to re-pull any asset_pipeline data from DVC. | |
| - name: Check cache for pipeline data | |
| id: cache-pipeline | |
| uses: actions/cache@v3 | |
| with: | |
| key: asset_pipeline-${{ hashFiles('asset_pipeline/dvc.lock') }} | |
| path: | | |
| asset_pipeline/artifacts/pipeline/combined_room_object_list.json | |
| asset_pipeline/artifacts/pipeline/object_inventory.json | |
| - if: ${{ steps.cache-pipeline.outputs.cache-hit != 'true' }} | |
| name: Install dvc | |
| run: pip install dvc[gs] | |
| - if: ${{ steps.cache-pipeline.outputs.cache-hit != 'true' }} | |
| name: Pull dvc data | |
| working-directory: asset_pipeline | |
| run: dvc pull combined_room_object_list object_inventory | |
| - if: ${{ steps.cache-pipeline.outputs.cache-hit != 'true' }} | |
| name: Unprotect data | |
| working-directory: asset_pipeline | |
| run: dvc unprotect artifacts/pipeline/combined_room_object_list.json artifacts/pipeline/object_inventory.json | |
| - name: Copy over pipeline files | |
| run: cp asset_pipeline/artifacts/pipeline/{combined_room_object_list,object_inventory}.json bddl/bddl/generated_data | |
| - name: Combine complaint files from asset_pipeline | |
| run: | | |
| python3 -c " | |
| import glob, json | |
| files = glob.glob('asset_pipeline/cad/*/*/complaints.json') | |
| combined = [] | |
| for file in files: | |
| with open(file) as f: | |
| combined.extend(json.load(f)) | |
| combined.sort(key=lambda x: (x['object'], x['type'], x['additional_info'], x['complaint'], x['processed'])) | |
| with open('bddl/bddl/generated_data/complaints.json', 'w') as f: | |
| json.dump(combined, f, indent=2) | |
| " | |
| - name: Install BDDL | |
| working-directory: bddl | |
| run: pip install -e . | |
| - name: Install dev requirements | |
| working-directory: bddl | |
| run: pip install -r requirements-dev.txt | |
| - name: Refresh sheets data | |
| working-directory: bddl | |
| run: python -m bddl.data_generation.pull_sheets | |
| - name: Refresh derivative data | |
| working-directory: bddl | |
| run: python -m bddl.data_generation.generate_datafiles | |
| # We want to check if the knowledgebase imports correctly, and if not, we want to NOT do the pull | |
| # because it will result in a permamently broken knowledgebase that blocks sampling and website. | |
| - name: Test if knowledgebase loads OK | |
| run: python -c "from bddl.knowledge_base import *" | |
| - uses: stefanzweifel/git-auto-commit-action@v4 | |
| with: | |
| commit_message: "Sync bddl Google Sheets data" |