Skip to content

Sync bddl Google Sheets data #115

Sync bddl Google Sheets data

Sync bddl Google Sheets data #115

name: Sync bddl Google Sheets data
on:
schedule:
- cron: "0 9 * * *" # Every day at 9am
workflow_dispatch:
# push:
# branches:
# - main
jobs:
pull-sheets:
name: Sync Google Sheets data
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Setup python
uses: actions/setup-python@v2
with:
python-version: "3.10"
architecture: x64
- name: Authenticate on Google Cloud
uses: 'google-github-actions/auth@v1'
with:
credentials_json: '${{ secrets.GCP_CREDENTIALS }}'
- uses: webfactory/[email protected]
with:
ssh-private-key: ${{ secrets.SHEETS_DEPLOY_KEY }}
# See if we need to re-pull any asset_pipeline data from DVC.
- name: Check cache for pipeline data
id: cache-pipeline
uses: actions/cache@v3
with:
key: asset_pipeline-${{ hashFiles('asset_pipeline/dvc.lock') }}
path: |
asset_pipeline/artifacts/pipeline/combined_room_object_list.json
asset_pipeline/artifacts/pipeline/object_inventory.json
- if: ${{ steps.cache-pipeline.outputs.cache-hit != 'true' }}
name: Install dvc
run: pip install dvc[gs]
- if: ${{ steps.cache-pipeline.outputs.cache-hit != 'true' }}
name: Pull dvc data
working-directory: asset_pipeline
run: dvc pull combined_room_object_list object_inventory
- if: ${{ steps.cache-pipeline.outputs.cache-hit != 'true' }}
name: Unprotect data
working-directory: asset_pipeline
run: dvc unprotect artifacts/pipeline/combined_room_object_list.json artifacts/pipeline/object_inventory.json
- name: Copy over pipeline files
run: cp asset_pipeline/artifacts/pipeline/{combined_room_object_list,object_inventory}.json bddl/bddl/generated_data
- name: Combine complaint files from asset_pipeline
run: |
python3 -c "
import glob, json
files = glob.glob('asset_pipeline/cad/*/*/complaints.json')
combined = []
for file in files:
with open(file) as f:
combined.extend(json.load(f))
combined.sort(key=lambda x: (x['object'], x['type'], x['additional_info'], x['complaint'], x['processed']))
with open('bddl/bddl/generated_data/complaints.json', 'w') as f:
json.dump(combined, f, indent=2)
"
- name: Install BDDL
working-directory: bddl
run: pip install -e .
- name: Install dev requirements
working-directory: bddl
run: pip install -r requirements-dev.txt
- name: Refresh sheets data
working-directory: bddl
run: python -m bddl.data_generation.pull_sheets
- name: Refresh derivative data
working-directory: bddl
run: python -m bddl.data_generation.generate_datafiles
# We want to check if the knowledgebase imports correctly, and if not, we want to NOT do the pull
# because it will result in a permamently broken knowledgebase that blocks sampling and website.
- name: Test if knowledgebase loads OK
run: python -c "from bddl.knowledge_base import *"
- uses: stefanzweifel/git-auto-commit-action@v4
with:
commit_message: "Sync bddl Google Sheets data"