run README and quantization steps from docs on MPS (#723)

mikekgfb · malfet · commit b217d4b37fa9 · 2024-07-17T09:55:45.000-07:00
* README and quantization steps from docs on MPS

* device handling for aot load

* add objcopy

* fixes

* fixes
diff --git a/.github/workflows/run-readme-periodic.yml b/.github/workflows/run-readme-periodic.yml
@@ -2,7 +2,7 @@ name: Run the README instructions periodically to ensure they work
 
 on:
   schedule:
-    - cron: '0 0 * * *'  # Runs daily at midnight UTC
+    - cron: '0 0 * * *'  # Runs daily at midnight UTC 
   push:
     tags:
       - ciflow/periodic/*
@@ -22,10 +22,10 @@ jobs:
         uname -a
         echo "::endgroup::"
 
-        # echo "::group::Install newer objcopy that supports --set-section-alignment"
-        # yum install -y  devtoolset-10-binutils
-        # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
-        # echo "::endgroup::"
+        echo "::group::Install newer objcopy that supports --set-section-alignment"
+        yum install -y  devtoolset-10-binutils
+        export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
+        echo "::endgroup::"
 
         # echo "::group::get_llama"
         # (
diff --git a/.github/workflows/run-readme-pr-mps.yml b/.github/workflows/run-readme-pr-mps.yml
@@ -49,45 +49,46 @@ jobs:
           echo "*******************************************"
           echo "::endgroup::"
 
-  test-quantization-mps-macos:
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    with:
-      runner: macos-m1-stable  # neeps MPS, was macos-m1-stable
-      script: |
-          set -x
-          conda create -y -n test-quantization-mps-macos python=3.10.11
-          conda activate test-quantization-mps-macos
-          # NS: Remove previous installation  of torch first
-          # as this script does not isntall anything into conda env but rather as system dep
-          pip3 uninstall -y torch || true
-          set -eou pipefail
-
-          echo "::group::Print machine info"
-          uname -a
-          sysctl machdep.cpu.brand_string
-          sysctl machdep.cpu.core_count
-          echo "::endgroup::"
-
-          # echo "::group::Install newer objcopy that supports --set-section-alignment"
-          # yum install -y  devtoolset-10-binutils
-          # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
-          # echo "::endgroup::"
-  
-          echo "::group::Create script to run quantization"
-          python3 scripts/updown.py --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh
-          # for good measure, if something happened to updown processor,
-          # and it did not error out, fail with an exit 1
-          echo "exit 1" >> ./run-quantization.sh
-          echo "::endgroup::"
-  
-          echo "::group::Run quantization"
-          echo "*******************************************"
-          cat ./run-quantization.sh
-          echo "*******************************************"
-          bash -x ./run-quantization.sh
-          echo "::endgroup::"
-  
-          echo "::group::Completion"
-          echo "tests complete"
-          echo "*******************************************"
-          echo "::endgroup::"
+#  test-quantization-mps-macos:
+#    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#    with:
+#      runner: macos-m1-stable  # neeps MPS, was macos-m1-stable
+#      script: |
+#          set -x
+#          conda create -y -n test-quantization-mps-macos python=3.10.11
+#          conda activate test-quantization-mps-macos
+#          # NS: Remove previous installation  of torch first
+#          # as this script does not isntall anything into conda env but rather as system dep
+#          pip3 uninstall -y torch || true
+#          set -eou pipefail
+#
+#          echo "::group::Print machine info"
+#          uname -a
+#          sysctl machdep.cpu.brand_string
+#          sysctl machdep.cpu.core_count
+#          echo "::endgroup::"
+#
+#          # echo "::group::Install newer objcopy that supports --set-section-alignment"
+#          # yum install -y  devtoolset-10-binutils
+#          # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
+#          # echo "::endgroup::"
+#  
+#          echo "::group::Create script to run quantization"
+#          python3 scripts/updown.py --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh
+#          # for good measure, if something happened to updown processor,
+#          # and it did not error out, fail with an exit 1
+#          echo "exit 1" >> ./run-quantization.sh
+#          echo "::endgroup::"
+#  
+#          echo "::group::Run quantization"
+#          echo "*******************************************"
+#          cat ./run-quantization.sh
+#          echo "*******************************************"
+#          bash -x ./run-quantization.sh
+#          echo "::endgroup::"
+#  
+#          echo "::group::Completion"
+#          echo "tests complete"
+#          echo "*******************************************"
+#          echo "::endgroup::"
+#
diff --git a/build/builder.py b/build/builder.py
@@ -382,9 +382,11 @@ def _initialize_model(
 
         try:
             if "mps" in builder_args.device:
-                print("Warning: MPS currently does not support DSO models. Trying to load for CPU.")
+                print(
+                    "Cannot load specified DSO to MPS. Attempting to load model to CPU instead"
+                )
                 builder_args.device = "cpu"
-                
+
             # Replace model forward with the AOT-compiled forward
             # This is a hacky way to quickly demo AOTI's capability.
             # model is still a Python object, and any mutation to its
diff --git a/export.py b/export.py
@@ -51,7 +51,7 @@ def main(args):
     if output_dso_path and "mps" in builder_args.device:
         print("Warning! Device MPS not supported for export. Exporting for device CPU.")
         builder_args.device = "cpu"
-        
+
     # TODO: clean this up
     # This mess is because ET does not support _weight_int4pack_mm right now
     if not builder_args.gguf_path: