feat: add planning result logging (#4022) #6410
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This workflow will install Python dependencies, run tests and lint with a variety of Python versions | |
| # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | |
| name: CUDA Unit Test CI | |
| on: | |
| push: | |
| branches: | |
| # only run tests on main branch & nightly; release should be triggered manually | |
| - nightly | |
| - main | |
| paths-ignore: | |
| - "docs/*" | |
| - "third_party/*" | |
| - .gitignore | |
| - "*.md" | |
| - ".github/workflows/[bcdprv]*.yml" | |
| - ".github/workflows/unittest_ci_cpu.yml" | |
| - '.github/scripts/*.sh' | |
| - '.github/scripts/*.py' | |
| pull_request: | |
| paths-ignore: | |
| - "docs/*" | |
| - "third_party/*" | |
| - .gitignore | |
| - "*.md" | |
| - ".github/workflows/[bcdprv]*.yml" | |
| - ".github/workflows/unittest_ci_cpu.yml" | |
| - '.github/scripts/*.sh' | |
| - '.github/scripts/*.py' | |
| workflow_dispatch: | |
| inputs: | |
| channel: | |
| description: "Channel to use for torch and fbgemm" | |
| required: true | |
| type: choice | |
| options: | |
| - release | |
| - nightly | |
| - test | |
| jobs: | |
| unittest_ci_gpu: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| cuda-tag: ["cu126", "cu128", "cu129", "cu130"] | |
| os: | |
| - linux.g5.12xlarge.nvidia.gpu | |
| python: | |
| - version: "3.10" | |
| tag: "py310" | |
| - version: "3.11" | |
| tag: "py311" | |
| - version: "3.12" | |
| tag: "py312" | |
| - version: "3.13" | |
| tag: "py313" | |
| - version: "3.14" | |
| tag: "py314" | |
| - version: "3.14" | |
| tag: "py314" | |
| free_threaded: true | |
| is_pr: | |
| - ${{ github.event_name == 'pull_request' }} | |
| is_main_push: # for main branch | |
| - ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} | |
| exclude: | |
| - is_pr: true | |
| cuda-tag: "cu126" | |
| - is_pr: true | |
| cuda-tag: "cu128" | |
| - is_pr: true | |
| cuda-tag: "cu129" | |
| - is_pr: true | |
| cuda-tag: "cu130" | |
| python: | |
| version: "3.10" | |
| - is_pr: true | |
| cuda-tag: "cu130" | |
| python: | |
| version: "3.11" | |
| - is_pr: true | |
| cuda-tag: "cu130" | |
| python: | |
| version: "3.12" | |
| - is_pr: true | |
| cuda-tag: "cu130" | |
| python: | |
| version: "3.13" | |
| - is_main_push: true | |
| cuda-tag: "cu126" | |
| python: | |
| version: "3.11" | |
| - is_main_push: true | |
| cuda-tag: "cu126" | |
| python: | |
| version: "3.12" | |
| - is_main_push: true | |
| cuda-tag: "cu126" | |
| python: | |
| version: "3.13" | |
| - is_main_push: true | |
| cuda-tag: "cu128" | |
| python: | |
| version: "3.10" | |
| - is_main_push: true | |
| cuda-tag: "cu128" | |
| python: | |
| version: "3.12" | |
| - is_main_push: true | |
| cuda-tag: "cu128" | |
| python: | |
| version: "3.14" | |
| - is_main_push: true | |
| cuda-tag: "cu129" | |
| python: | |
| version: "3.11" | |
| - is_main_push: true | |
| cuda-tag: "cu129" | |
| python: | |
| version: "3.13" | |
| - is_main_push: true | |
| cuda-tag: "cu129" | |
| python: | |
| version: "3.14" | |
| free_threaded: true | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
| permissions: | |
| id-token: write | |
| contents: read | |
| with: | |
| runner: ${{ matrix.os }} | |
| timeout: 60 | |
| script: | | |
| ldd --version | |
| if [[ "${{ matrix.python.free_threaded }}" == "true" ]]; then | |
| conda create -y --name build_binary python-freethreading=${{ matrix.python.version }} | |
| else | |
| conda create -y --name build_binary python=${{ matrix.python.version }} | |
| fi | |
| conda run -n build_binary \ | |
| python -c "import sys; print(f'python GIL enabled: {sys._is_gil_enabled()}') if sys.version >= '3.13' else print('python GIL enabled: true')" | |
| conda info | |
| python --version | |
| conda run -n build_binary python --version | |
| if [[ "${{ inputs.channel }}" = "release" ]]; then | |
| index_url=https://download.pytorch.org/whl/${{ matrix.cuda-tag }} | |
| elif [ -z "${{ inputs.channel }}" ]; then | |
| index_url=https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }} | |
| else | |
| index_url=https://download.pytorch.org/whl/${{ inputs.channel }}/${{ matrix.cuda-tag }} | |
| fi | |
| echo "index_url: $index_url" | |
| if [[ "${{ matrix.python.version }}" = "3.14" ]]; then | |
| # temporary workaround for torch package issue in python 3.14 | |
| conda run -n build_binary pip install packaging | |
| fi | |
| conda run -n build_binary \ | |
| pip install torch --index-url $index_url | |
| conda run -n build_binary \ | |
| python -c "import torch; print(torch.__version__)" | |
| echo "torch succeeded" | |
| conda run -n build_binary \ | |
| python -c "import torch.distributed" | |
| conda run -n build_binary \ | |
| pip install fbgemm-gpu --index-url $index_url | |
| conda run -n build_binary \ | |
| python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)" | |
| echo "fbgemm_gpu succeeded" | |
| conda run -n build_binary \ | |
| pip install -r requirements.txt | |
| conda run -n build_binary \ | |
| python setup.py bdist_wheel \ | |
| --python-tag=${{ matrix.python.tag }} | |
| conda run -n build_binary \ | |
| python -c "import torchrec" | |
| echo "torch.distributed succeeded" | |
| conda run -n build_binary \ | |
| python -c "import numpy" | |
| echo "numpy succeeded" | |
| conda run -n build_binary \ | |
| pip install pytest | |
| conda run -n build_binary \ | |
| python -m pytest torchrec -v -s \ | |
| -W ignore::pytest.PytestCollectionWarning \ | |
| --continue-on-collection-errors \ | |
| --ignore=torchrec/distributed/tests/test_comm.py \ | |
| --ignore=torchrec/distributed/tests/test_infer_shardings.py \ | |
| --ignore=torchrec/distributed/tests/test_keyed_jagged_tensor_pool.py \ | |
| --ignore=torchrec/distributed/tests/test_pt2_multiprocess.py \ | |
| --ignore=torchrec/distributed/tests/test_pt2.py \ | |
| --ignore=torchrec/distributed/tests/test_quant_model_parallel.py \ | |
| --ignore=torchrec/distributed/tests/test_quant_pruning.py \ | |
| --ignore=torchrec/distributed/tests/test_quant_sequence_model_parallel.py \ | |
| --ignore=torchrec/distributed/tests/test_cache_prefetch.py \ | |
| --ignore=torchrec/distributed/tests/test_fp_embeddingbag_single_rank.py \ | |
| --ignore=torchrec/distributed/tests/test_infer_utils.py \ | |
| --ignore=torchrec/distributed/tests/test_fx_jit.py \ | |
| --ignore=torchrec/distributed/tests/test_model_parallel_hierarchical.py \ | |
| --ignore-glob=**/test_utils/ \ | |
| --ignore-glob='torchrec/metrics/' \ | |
| --ignore-glob='*test_train_pipeline*' \ | |
| --ignore-glob='torchrec/distributed/tests/test_model_parallel_gloo*' \ | |
| --ignore-glob='torchrec/inference/inference_legacy/tests*' \ | |
| --ignore-glob='*test_model_parallel_nccl*' \ | |
| -k "not _disabled_in_oss_compatibility" | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} | |
| cancel-in-progress: true |