From b217d4b37fa95d7751a5232111bd230dc2d88f77 Mon Sep 17 00:00:00 2001 From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com> Date: Thu, 9 May 2024 11:47:30 -0700 Subject: [PATCH] run README and quantization steps from docs on MPS (#723) * README and quantization steps from docs on MPS * device handling for aot load * add objcopy * fixes * fixes --- .github/workflows/run-readme-periodic.yml | 10 +-- .../workflows}/run-readme-pr-mps.yml | 85 ++++++++++--------- build/builder.py | 6 +- export.py | 2 +- 4 files changed, 53 insertions(+), 50 deletions(-) rename {parking_lot => .github/workflows}/run-readme-pr-mps.yml (51%) diff --git a/.github/workflows/run-readme-periodic.yml b/.github/workflows/run-readme-periodic.yml index 538b5f910..e55c2586c 100644 --- a/.github/workflows/run-readme-periodic.yml +++ b/.github/workflows/run-readme-periodic.yml @@ -2,7 +2,7 @@ name: Run the README instructions periodically to ensure they work on: schedule: - - cron: '0 0 * * *' # Runs daily at midnight UTC + - cron: '0 0 * * *' # Runs daily at midnight UTC push: tags: - ciflow/periodic/* @@ -22,10 +22,10 @@ jobs: uname -a echo "::endgroup::" - # echo "::group::Install newer objcopy that supports --set-section-alignment" - # yum install -y devtoolset-10-binutils - # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - # echo "::endgroup::" + echo "::group::Install newer objcopy that supports --set-section-alignment" + yum install -y devtoolset-10-binutils + export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH + echo "::endgroup::" # echo "::group::get_llama" # ( diff --git a/parking_lot/run-readme-pr-mps.yml b/.github/workflows/run-readme-pr-mps.yml similarity index 51% rename from parking_lot/run-readme-pr-mps.yml rename to .github/workflows/run-readme-pr-mps.yml index 3a49f10f3..bb1acd0e2 100644 --- a/parking_lot/run-readme-pr-mps.yml +++ b/.github/workflows/run-readme-pr-mps.yml @@ -49,45 +49,46 @@ jobs: echo "*******************************************" echo "::endgroup::" - test-quantization-mps-macos: - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - with: - runner: macos-m1-stable # neeps MPS, was macos-m1-stable - script: | - set -x - conda create -y -n test-quantization-mps-macos python=3.10.11 - conda activate test-quantization-mps-macos - # NS: Remove previous installation of torch first - # as this script does not isntall anything into conda env but rather as system dep - pip3 uninstall -y torch || true - set -eou pipefail - - echo "::group::Print machine info" - uname -a - sysctl machdep.cpu.brand_string - sysctl machdep.cpu.core_count - echo "::endgroup::" - - # echo "::group::Install newer objcopy that supports --set-section-alignment" - # yum install -y devtoolset-10-binutils - # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - # echo "::endgroup::" - - echo "::group::Create script to run quantization" - python3 scripts/updown.py --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh - # for good measure, if something happened to updown processor, - # and it did not error out, fail with an exit 1 - echo "exit 1" >> ./run-quantization.sh - echo "::endgroup::" - - echo "::group::Run quantization" - echo "*******************************************" - cat ./run-quantization.sh - echo "*******************************************" - bash -x ./run-quantization.sh - echo "::endgroup::" - - echo "::group::Completion" - echo "tests complete" - echo "*******************************************" - echo "::endgroup::" +# test-quantization-mps-macos: +# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main +# with: +# runner: macos-m1-stable # neeps MPS, was macos-m1-stable +# script: | +# set -x +# conda create -y -n test-quantization-mps-macos python=3.10.11 +# conda activate test-quantization-mps-macos +# # NS: Remove previous installation of torch first +# # as this script does not isntall anything into conda env but rather as system dep +# pip3 uninstall -y torch || true +# set -eou pipefail +# +# echo "::group::Print machine info" +# uname -a +# sysctl machdep.cpu.brand_string +# sysctl machdep.cpu.core_count +# echo "::endgroup::" +# +# # echo "::group::Install newer objcopy that supports --set-section-alignment" +# # yum install -y devtoolset-10-binutils +# # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH +# # echo "::endgroup::" +# +# echo "::group::Create script to run quantization" +# python3 scripts/updown.py --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh +# # for good measure, if something happened to updown processor, +# # and it did not error out, fail with an exit 1 +# echo "exit 1" >> ./run-quantization.sh +# echo "::endgroup::" +# +# echo "::group::Run quantization" +# echo "*******************************************" +# cat ./run-quantization.sh +# echo "*******************************************" +# bash -x ./run-quantization.sh +# echo "::endgroup::" +# +# echo "::group::Completion" +# echo "tests complete" +# echo "*******************************************" +# echo "::endgroup::" +# \ No newline at end of file diff --git a/build/builder.py b/build/builder.py index 60c833361..e094b4e7a 100644 --- a/build/builder.py +++ b/build/builder.py @@ -382,9 +382,11 @@ def _initialize_model( try: if "mps" in builder_args.device: - print("Warning: MPS currently does not support DSO models. Trying to load for CPU.") + print( + "Cannot load specified DSO to MPS. Attempting to load model to CPU instead" + ) builder_args.device = "cpu" - + # Replace model forward with the AOT-compiled forward # This is a hacky way to quickly demo AOTI's capability. # model is still a Python object, and any mutation to its diff --git a/export.py b/export.py index 9509aceaa..d435c2ff5 100644 --- a/export.py +++ b/export.py @@ -51,7 +51,7 @@ def main(args): if output_dso_path and "mps" in builder_args.device: print("Warning! Device MPS not supported for export. Exporting for device CPU.") builder_args.device = "cpu" - + # TODO: clean this up # This mess is because ET does not support _weight_int4pack_mm right now if not builder_args.gguf_path: