From af98f76f42f6f481696a22eca17691b8e46a832f Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 18 Mar 2025 10:24:22 -0700 Subject: [PATCH 01/23] [DO NOT MERGE] 2.7 RC Test --- .jenkins/build.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 4a869d35a7..06d7cc38e6 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -22,11 +22,12 @@ sudo apt-get install -y pandoc #Install PyTorch Nightly for test. # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). -# sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata -# sudo pip3 install torch==2.6.0 torchvision --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 +sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata +pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128 # sudo pip uninstall -y fbgemm-gpu torchrec # sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 + # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm python -m spacy download de_core_news_sm From bf608af0cd7e295ad9a54d14268395a2c388b490 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 18 Mar 2025 11:24:42 -0700 Subject: [PATCH 02/23] Update .jenkins/build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 06d7cc38e6..3d6d5bbc24 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -23,7 +23,7 @@ sudo apt-get install -y pandoc # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata -pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128 +pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126 # sudo pip uninstall -y fbgemm-gpu torchrec # sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 From dab616388cd75703c91f4aa6680fd27772bbead8 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 18 Mar 2025 13:11:59 -0700 Subject: [PATCH 03/23] Update .jenkins/build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 3d6d5bbc24..c66a4dc392 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -23,7 +23,7 @@ sudo apt-get install -y pandoc # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata -pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126 +pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu118 # sudo pip uninstall -y fbgemm-gpu torchrec # sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 From b183f0adb97d59ca7ac127224c7b193ee158c391 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 18 Mar 2025 14:06:22 -0700 Subject: [PATCH 04/23] Update build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index c66a4dc392..cd296cc245 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -23,7 +23,7 @@ sudo apt-get install -y pandoc # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata -pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu118 +pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 # sudo pip uninstall -y fbgemm-gpu torchrec # sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 From 15216e71480215bcbe53a5c1c468617f61e8c825 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 18 Mar 2025 15:09:53 -0700 Subject: [PATCH 05/23] Update build.sh --- .jenkins/build.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index cd296cc245..e36135edab 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -23,9 +23,9 @@ sudo apt-get install -y pandoc # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata -pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 -# sudo pip uninstall -y fbgemm-gpu torchrec -# sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 +pip3 install torch==2.7.0 torchvision --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 +#sudo pip uninstall -y fbgemm-gpu torchrec +#sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 # Install two language tokenizers for Translation with TorchText tutorial From 20365bfcd9282f1792fed63622719eb9b5e33c92 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 18 Mar 2025 15:51:21 -0700 Subject: [PATCH 06/23] Update build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index e36135edab..477fd1d639 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -23,7 +23,7 @@ sudo apt-get install -y pandoc # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata -pip3 install torch==2.7.0 torchvision --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 +pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 #sudo pip uninstall -y fbgemm-gpu torchrec #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 From e51b6e66c8c967a78284b767baf4e4f825cc7a37 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Wed, 19 Mar 2025 12:57:22 -0700 Subject: [PATCH 07/23] Update onnxscript in requirements (#3300) --- .ci/docker/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt index c032835633..434299cc23 100644 --- a/.ci/docker/requirements.txt +++ b/.ci/docker/requirements.txt @@ -36,7 +36,7 @@ datasets transformers torchmultimodal-nightly # needs to be updated to stable as soon as it's avaialable onnx -onnxscript +onnxscript>=0.2.2 onnxruntime evaluate accelerate>=0.20.1 From eff088bb500f4b5e2b0d8e0c18f1bf9cfbf08b93 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 19 Mar 2025 13:44:47 -0700 Subject: [PATCH 08/23] Update requirements.txt --- .ci/docker/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt index 434299cc23..f969963988 100644 --- a/.ci/docker/requirements.txt +++ b/.ci/docker/requirements.txt @@ -28,8 +28,8 @@ tensorboard jinja2==3.1.3 pytorch-lightning torchx -torchrl==0.6.0 -tensordict==0.6.0 +torchrl==0.7.2 +tensordict==0.7.2 ax-platform>=0.4.0 nbformat>=5.9.2 datasets From dc969fdad0e1953f397b014fa07067a9be0d0f1d Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 21 Mar 2025 09:05:59 -0700 Subject: [PATCH 09/23] Update --- .jenkins/validate_tutorials_built.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py index f78ec11e1a..399ca71bcf 100644 --- a/.jenkins/validate_tutorials_built.py +++ b/.jenkins/validate_tutorials_built.py @@ -50,7 +50,14 @@ "intermediate_source/flask_rest_api_tutorial", "intermediate_source/text_to_speech_with_torchaudio", "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release. - "advanced_source/semi_structured_sparse" # reenable after 3303 is fixed. + "advanced_source/semi_structured_sparse", # reenable after 3303 is fixed. + "intermediate_source/mario_rl_tutorial", # reenable after 3302 is fixed + "intermediate_source/reinforcement_ppo", # reenable after 3302 is fixed + "intermediate_source/pinmem_nonblock", # reenable after 3302 is fixed + "intermediate_source/dqn_with_rnn_tutorial", # reenable after 3302 is fixed + "advanced_source/pendulum", # reenable after 3302 is fixed + "advanced_source/coding_ddpg", # reenable after 3302 is fixed + "intermediate_source/torchrec_intro_tutorial" # reenable after 3302 is fixed ] def tutorial_source_dirs() -> List[Path]: From 3cdb01ffe7fdbdd2c1cea30e9c5eb29769d82c60 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 21 Mar 2025 12:17:16 -0700 Subject: [PATCH 10/23] Update build.sh --- .jenkins/build.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 477fd1d639..069c21217f 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -22,8 +22,11 @@ sudo apt-get install -y pandoc #Install PyTorch Nightly for test. # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). -sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata +sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 +pip install git+https://github.com/pytorch/tensordict +pip install git+https://github.com/pytorch/torchrl + #sudo pip uninstall -y fbgemm-gpu torchrec #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 From 4b04c9ba9234dfbc12f00aade2f5fe9991854939 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 21 Mar 2025 12:17:41 -0700 Subject: [PATCH 11/23] Update .jenkins/validate_tutorials_built.py --- .jenkins/validate_tutorials_built.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py index 399ca71bcf..f78ec11e1a 100644 --- a/.jenkins/validate_tutorials_built.py +++ b/.jenkins/validate_tutorials_built.py @@ -50,14 +50,7 @@ "intermediate_source/flask_rest_api_tutorial", "intermediate_source/text_to_speech_with_torchaudio", "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release. - "advanced_source/semi_structured_sparse", # reenable after 3303 is fixed. - "intermediate_source/mario_rl_tutorial", # reenable after 3302 is fixed - "intermediate_source/reinforcement_ppo", # reenable after 3302 is fixed - "intermediate_source/pinmem_nonblock", # reenable after 3302 is fixed - "intermediate_source/dqn_with_rnn_tutorial", # reenable after 3302 is fixed - "advanced_source/pendulum", # reenable after 3302 is fixed - "advanced_source/coding_ddpg", # reenable after 3302 is fixed - "intermediate_source/torchrec_intro_tutorial" # reenable after 3302 is fixed + "advanced_source/semi_structured_sparse" # reenable after 3303 is fixed. ] def tutorial_source_dirs() -> List[Path]: From 4aae3b1a35beafcb76eeaa78f3c3da3b627188f7 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 21 Mar 2025 14:16:22 -0700 Subject: [PATCH 12/23] Update build.sh --- .jenkins/build.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 069c21217f..49e0668640 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -24,9 +24,8 @@ sudo apt-get install -y pandoc # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 -pip install git+https://github.com/pytorch/tensordict -pip install git+https://github.com/pytorch/torchrl - +pip install git+https://github.com/pytorch/tensordict.git@main --depth=1 +pip install git+https://github.com/pytorch/torchrl.git@main --depth=1 #sudo pip uninstall -y fbgemm-gpu torchrec #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 From 327b32b62a5541b6c08dcb0aa764a95a6917ee4d Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 21 Mar 2025 17:10:36 -0700 Subject: [PATCH 13/23] Update .jenkins/build.sh --- .jenkins/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 49e0668640..70f841af8a 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -24,8 +24,8 @@ sudo apt-get install -y pandoc # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 -pip install git+https://github.com/pytorch/tensordict.git@main --depth=1 -pip install git+https://github.com/pytorch/torchrl.git@main --depth=1 +pip install git+https://github.com/pytorch/tensordict +pip install git+https://github.com/pytorch/torchrl #sudo pip uninstall -y fbgemm-gpu torchrec #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 From 0b69e411c6c8d03e602c3d39fd1346a2963dd4f5 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 24 Mar 2025 09:51:43 -0700 Subject: [PATCH 14/23] Update build.sh --- .jenkins/build.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 70f841af8a..7208376475 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -24,8 +24,6 @@ sudo apt-get install -y pandoc # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 -pip install git+https://github.com/pytorch/tensordict -pip install git+https://github.com/pytorch/torchrl #sudo pip uninstall -y fbgemm-gpu torchrec #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 From 4221d54ef82b45c0d1659e30a391a0e87fcb6106 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 24 Mar 2025 10:01:57 -0700 Subject: [PATCH 15/23] Apply suggestions from code review --- .jenkins/build.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 7208376475..af795d3dad 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -24,7 +24,10 @@ sudo apt-get install -y pandoc # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 -#sudo pip uninstall -y fbgemm-gpu torchrec +sudo pip uninstall -y fbgemm-gpu torchrec +sudo pip3 install -y https://download.pytorch.org/whl/nightly/fbgemm-gpu/ +sudo pip3 install -y https://download.pytorch.org/whl/nightly/torchrec/ + #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 From 68e58aea1bc4b857bb976038c87a79637cc99cc9 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 24 Mar 2025 11:05:13 -0700 Subject: [PATCH 16/23] Update build.sh --- .jenkins/build.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index af795d3dad..e40dd82ecc 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -25,9 +25,7 @@ sudo apt-get install -y pandoc sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 sudo pip uninstall -y fbgemm-gpu torchrec -sudo pip3 install -y https://download.pytorch.org/whl/nightly/fbgemm-gpu/ -sudo pip3 install -y https://download.pytorch.org/whl/nightly/torchrec/ - +sudo pip3 install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu126/ #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 From 69802e070eec30e9de1666ceb079704b5aa8cb0e Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 24 Mar 2025 13:34:28 -0700 Subject: [PATCH 17/23] Update requirements.txt --- .ci/docker/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt index f969963988..0e95c62c6b 100644 --- a/.ci/docker/requirements.txt +++ b/.ci/docker/requirements.txt @@ -69,5 +69,5 @@ pycocotools semilearn==0.3.2 torchao==0.5.0 segment_anything==1.0 -torchrec==1.0.0; platform_system == "Linux" +torchrec==1.1.0; platform_system == "Linux" fbgemm-gpu==1.1.0; platform_system == "Linux" From d463fd49d0aaa85053cf39b64d57c955adc3142a Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 24 Mar 2025 13:35:12 -0700 Subject: [PATCH 18/23] Update .jenkins/build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index e40dd82ecc..f8916478ca 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -24,7 +24,7 @@ sudo apt-get install -y pandoc # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 -sudo pip uninstall -y fbgemm-gpu torchrec +sudo pip uninstall -y fbgemm-gpu sudo pip3 install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu126/ #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 From 81efd5fc778030111f5bcf61760fb1c83bd8789e Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 24 Mar 2025 14:36:37 -0700 Subject: [PATCH 19/23] Update .jenkins/build.sh --- .jenkins/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index f8916478ca..ccd3683779 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -26,6 +26,8 @@ sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl t pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 sudo pip uninstall -y fbgemm-gpu sudo pip3 install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu126/ +pip install tensordict-nightly +pip install torchrl-nightly #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 From f7d8e7a565f69c0cf512d0e1421ac289282df1f3 Mon Sep 17 00:00:00 2001 From: Bin Bao Date: Tue, 25 Mar 2025 17:35:34 -0400 Subject: [PATCH 20/23] Fix the AOTI example (#3306) Summary: The compiled model run takes the same input as Eager. No need to explicitly compose args as a tuple. --- intermediate_source/torch_export_tutorial.py | 2 +- recipes_source/torch_export_aoti_python.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/intermediate_source/torch_export_tutorial.py b/intermediate_source/torch_export_tutorial.py index 3ca6d09a52..20b1b4023e 100644 --- a/intermediate_source/torch_export_tutorial.py +++ b/intermediate_source/torch_export_tutorial.py @@ -995,7 +995,7 @@ def forward(self, x): # with torch.no_grad(): # pt2_path = torch._inductor.aoti_compile_and_package(ep) # -# # Load and run the .so file in Python. +# # Load and run the .pt2 file in Python. # # To load and run it in a C++ environment, see: # # https://pytorch.org/docs/main/torch.compiler_aot_inductor.html # aoti_compiled = torch._inductor.aoti_load_package(pt2_path) diff --git a/recipes_source/torch_export_aoti_python.py b/recipes_source/torch_export_aoti_python.py index c0cbb7e280..ff311f071e 100644 --- a/recipes_source/torch_export_aoti_python.py +++ b/recipes_source/torch_export_aoti_python.py @@ -176,7 +176,7 @@ model_path = os.path.join(os.getcwd(), "resnet18.pt2") compiled_model = torch._inductor.aoti_load_package(model_path) -example_inputs = (torch.randn(2, 3, 224, 224, device=device),) +example_inputs = torch.randn(2, 3, 224, 224, device=device) with torch.inference_mode(): output = compiled_model(example_inputs) @@ -238,11 +238,11 @@ def timed(fn): torch._dynamo.reset() -model = torch._inductor.aoti_load_package(model_path) -example_inputs = (torch.randn(1, 3, 224, 224, device=device),) +compiled_model = torch._inductor.aoti_load_package(model_path) +example_inputs = torch.randn(1, 3, 224, 224, device=device) with torch.inference_mode(): - _, time_taken = timed(lambda: model(example_inputs)) + _, time_taken = timed(lambda: compiled_model(example_inputs)) print(f"Time taken for first inference for AOTInductor is {time_taken:.2f} ms") From f58cf379931b094541e9c0955fe9e0b33b49f182 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 25 Mar 2025 19:36:05 -0700 Subject: [PATCH 21/23] Update build.sh --- .jenkins/build.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index ccd3683779..7705a429cd 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -24,10 +24,10 @@ sudo apt-get install -y pandoc # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 -sudo pip uninstall -y fbgemm-gpu -sudo pip3 install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu126/ -pip install tensordict-nightly -pip install torchrl-nightly +#sudo pip uninstall -y fbgemm-gpu +#sudo pip3 install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu126/ +#pip install tensordict-nightly +#pip install torchrl-nightly #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 From 6e3f90a613028768708ac22c0c1b2eaf373fe9f2 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 25 Mar 2025 19:45:54 -0700 Subject: [PATCH 22/23] Disable rl tutorials again --- .jenkins/validate_tutorials_built.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py index f78ec11e1a..399ca71bcf 100644 --- a/.jenkins/validate_tutorials_built.py +++ b/.jenkins/validate_tutorials_built.py @@ -50,7 +50,14 @@ "intermediate_source/flask_rest_api_tutorial", "intermediate_source/text_to_speech_with_torchaudio", "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release. - "advanced_source/semi_structured_sparse" # reenable after 3303 is fixed. + "advanced_source/semi_structured_sparse", # reenable after 3303 is fixed. + "intermediate_source/mario_rl_tutorial", # reenable after 3302 is fixed + "intermediate_source/reinforcement_ppo", # reenable after 3302 is fixed + "intermediate_source/pinmem_nonblock", # reenable after 3302 is fixed + "intermediate_source/dqn_with_rnn_tutorial", # reenable after 3302 is fixed + "advanced_source/pendulum", # reenable after 3302 is fixed + "advanced_source/coding_ddpg", # reenable after 3302 is fixed + "intermediate_source/torchrec_intro_tutorial" # reenable after 3302 is fixed ] def tutorial_source_dirs() -> List[Path]: From 95d518129c8987a14a187aa761d1e8e6006e5a66 Mon Sep 17 00:00:00 2001 From: Michael Lazos Date: Tue, 15 Apr 2025 13:40:40 -0700 Subject: [PATCH 23/23] First commit --- recipes_source/foreach_map.py | 198 +++++++++++++++++++++++++++++++ recipes_source/recipes_index.rst | 9 ++ 2 files changed, 207 insertions(+) create mode 100644 recipes_source/foreach_map.py diff --git a/recipes_source/foreach_map.py b/recipes_source/foreach_map.py new file mode 100644 index 0000000000..655c0c5aa2 --- /dev/null +++ b/recipes_source/foreach_map.py @@ -0,0 +1,198 @@ +""" +(beta) Explicit horizontal fusion with foreach_map and torch.compile +============================================================ + +**Author:** `Michael Lazos `_ +""" + +######################################################### +# Horizontal fusion is a key optimization in ML compilers. In eager, +# this is typically expressed using the torch._foreach* ops which parallelizes +# operations across a list of tensors. However, supporting all possible permutations +# of arguments is quite difficult (e.g. mixtures of scalars and lists). Foreach_map +# allows conversion of any pointwise op in ``torch`` to a horiztonally fused foreach +# variant. In this tutorial, we will demonstrate how to implement the Adam optimizer +# with ``foreach_map`` to generate a fully fused kernel. +# +# +# .. note:: +# +# This tutorial requires PyTorch 2.7.0 or later. + +##################################################################### +# Model Setup +# ~~~~~~~~~~~~~~~~~~~~~ +# For this example, we'll use a simple sequence of linear layers. +# We instantiate an independent copy to compare the two optimizer implementations. +# +import torch + +# exit cleanly if we are on a device that doesn't support ``torch.compile`` +if torch.cuda.get_device_capability() < (7, 0): + print("Exiting because torch.compile is not supported on this device.") + import sys + sys.exit(0) + +# Create simple model +model = torch.nn.Sequential( + *[torch.nn.Linear(1024, 1024, False, device="cuda") for _ in range(10)] +) +model_copy = torch.nn.Sequential( + *[torch.nn.Linear(1024, 1024, False, device="cuda") for _ in range(10)] +) +input = torch.rand(1024, device="cuda") + +# run forward pass +output = model(input) +output_copy = model_copy(input) + +# run backward to populate the grads for our optimizer below +output.sum().backward() +output_copy.sum().backward() + +##################################################################### +# Helper functions for foreach_map implementation +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# In this section, we'll begin our implementation of the Adam optimizer. +# +from torch._higher_order_ops.foreach_map import foreach_map + +# Helper function to extract optimizer states from a torch.optim.Adam instance +def get_inputs(optim): + steps = [] + params = [] + grads = [] + exp_avgs = [] + exp_avg_sqs = [] + for group in optim.param_groups: + for p in group["params"]: + params.append(p) + grads.append(p.grad) + state = optim.state[p] + exp_avgs.append(state["exp_avg"]) + exp_avg_sqs.append(state["exp_avg_sq"]) + steps.append(state["step"]) + + return steps, params, exp_avgs, exp_avg_sqs + + +# Functions to update the different optimizer states +def update_exp_avg_sq(exp_avg_sq, grad, beta2): + return exp_avg_sq.mul(beta2).addcmul(grad, grad, value=1 - beta2) + +def update_param(param, step, exp_avg, exp_avg_sq, beta1, beta2, lr, eps): + bias_correction1 = 1 - torch.pow(beta1, step) + bias_correction2 = (1 - torch.pow(beta2, step)).sqrt() + step_size = (lr / bias_correction1).neg() + denom = (exp_avg_sq.sqrt() / (bias_correction2 * step_size)).add(eps / step_size) + return torch.add(param, torch.div(exp_avg, denom)) + +# Our full Adam implementation +def foreach_map_adam( + steps, + params, + exp_avgs, + exp_avg_sqs, + weight_decay=0, + beta1=0.9, + beta2=0.999, + lr=1e-3, + eps=1e-8, +): + with torch.no_grad(): + grads = [param.grad for param in params] + # update step + updated_steps = foreach_map(lambda x: x + 1, steps) + torch._foreach_copy_(steps, updated_steps) + + if weight_decay != 0: + foreach_map(torch.add, (grads,), alpha=weight_decay) + + # Higher-order operators (HOPs) cannot have multiple outputs at the moment + # need to call foreach_map once for each output + exp_avgs_updated = foreach_map(torch.lerp, exp_avgs, grads, 1 - beta1) + exp_avgs_sq_updated = foreach_map(update_exp_avg_sq, exp_avg_sqs, grads, beta2) + params_updated = foreach_map( + update_param, + params, + steps, + exp_avgs_updated, + exp_avgs_sq_updated, + beta1, + beta2, + lr, + eps, + ) + # Higher-order operators (HOPs) don't support input mutation today + # so manually update the states in-place + torch._foreach_copy_(exp_avgs, exp_avgs_updated) + torch._foreach_copy_(exp_avg_sqs, exp_avgs_sq_updated) + torch._foreach_copy_(params, params_updated) + return + +##################################################################### +# Setting up and running the compiled kernel +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# In this section, we'll run our Adam optimizer +# and compare the results +# +# .. note:: +# +# ``torch.compile`` is only supported on CUDA devices that have a compute capability of 7.0 or higher. +opt_eager = torch.optim.Adam(model.parameters(), lr=torch.tensor(0.01)) +opt_eager_copy = torch.optim.Adam(model_copy.parameters(), lr=torch.tensor(0.01)) + +# warm up the optimizer state dict +opt_eager.step() +opt_eager_copy.step() + +inputs = get_inputs(opt_eager_copy) +compiled_adam = torch.compile(foreach_map_adam) + +# optionally view the output code +torch._logging.set_logs(output_code=True) + +# Warmup runs to compile the function +for _ in range(5): + opt_eager.step() + compiled_adam(*inputs) + +for eager_p, compile_p in zip(opt_eager.param_groups[0]["params"], opt_eager_copy.param_groups[0]["params"]): + torch.allclose(eager_p, compile_p) + +# Benchmark performance + + # Let's define a helpful benchmarking function: +import torch.utils.benchmark as benchmark + +def benchmark_torch_function_in_microseconds(f, *args, **kwargs): + t0 = benchmark.Timer( + stmt="f(*args, **kwargs)", globals={"args": args, "kwargs": kwargs, "f": f} + ) + return t0.blocked_autorange().mean * 1e6 + +eager_runtime = benchmark_torch_function_in_microseconds(opt_eager.step) +compiled_runtime = benchmark_torch_function_in_microseconds(lambda: compiled_adam(*inputs)) + +assert eager_runtime > compiled_runtime + +print(f"eager runtime: {eager_runtime}us") +print(f"compiled runtime: {compiled_runtime}us") + + + +###################################################################### +# Conclusion +# ~~~~~~~~~~ +# In this tutorial, we successfully implemented a custom fully-fused Adam optimizer using foreach_map. +# By leveraging the power of foreach_map and torch.compile, we were able to create an optimized version of the Adam +# optimizer that can be used in various machine learning applications. This tutorial provides a comprehensive guide +# on how to use foreach_map and torch.compile to optimize machine learning models, and serves as a +# valuable resource for developers looking to improve the performance of their models with horizontal fusion. +# +# See also: +# +# * `Compiled optimizer tutorial `__ - an intro into the compiled optimizer. +# * `Compiling the optimizer with PT2 `__ - deeper technical details on the compiled optimizer. diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst index f136c4b9c6..1ad3b0d97a 100644 --- a/recipes_source/recipes_index.rst +++ b/recipes_source/recipes_index.rst @@ -335,6 +335,15 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu :link: ../recipes/compiling_optimizer_lr_scheduler.html :tags: Model-Optimization +.. (beta) Explicit horizontal fusion with foreach_map and torch.compile + +.. customcarditem:: + :header: (beta) Explicit horizontal fusion with foreach_map and torch.compile + :card_description: Horizontally fuse pointwise ops with torch.compile + :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: ../recipes/foreach_map.py + :tags: Model-Optimization + .. Using User-Defined Triton Kernels with ``torch.compile`` .. customcarditem::