From af98f76f42f6f481696a22eca17691b8e46a832f Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Tue, 18 Mar 2025 10:24:22 -0700
Subject: [PATCH 01/23] [DO NOT MERGE] 2.7 RC Test

---
 .jenkins/build.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index 4a869d35a7..06d7cc38e6 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -22,11 +22,12 @@ sudo apt-get install -y pandoc
 #Install PyTorch Nightly for test.
 # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
-# sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
-# sudo pip3 install torch==2.6.0 torchvision --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
+sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
+pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128
 # sudo pip uninstall -y fbgemm-gpu torchrec
 # sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
 
+
 # Install two language tokenizers for Translation with TorchText tutorial
 python -m spacy download en_core_web_sm
 python -m spacy download de_core_news_sm

From bf608af0cd7e295ad9a54d14268395a2c388b490 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Tue, 18 Mar 2025 11:24:42 -0700
Subject: [PATCH 02/23] Update .jenkins/build.sh

---
 .jenkins/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index 06d7cc38e6..3d6d5bbc24 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -23,7 +23,7 @@ sudo apt-get install -y pandoc
 # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
-pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128
+pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
 # sudo pip uninstall -y fbgemm-gpu torchrec
 # sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
 

From dab616388cd75703c91f4aa6680fd27772bbead8 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Tue, 18 Mar 2025 13:11:59 -0700
Subject: [PATCH 03/23] Update .jenkins/build.sh

---
 .jenkins/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index 3d6d5bbc24..c66a4dc392 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -23,7 +23,7 @@ sudo apt-get install -y pandoc
 # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
-pip3 install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
+pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu118
 # sudo pip uninstall -y fbgemm-gpu torchrec
 # sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
 

From b183f0adb97d59ca7ac127224c7b193ee158c391 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Tue, 18 Mar 2025 14:06:22 -0700
Subject: [PATCH 04/23] Update build.sh

---
 .jenkins/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index c66a4dc392..cd296cc245 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -23,7 +23,7 @@ sudo apt-get install -y pandoc
 # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
-pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu118
+pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 # sudo pip uninstall -y fbgemm-gpu torchrec
 # sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
 

From 15216e71480215bcbe53a5c1c468617f61e8c825 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Tue, 18 Mar 2025 15:09:53 -0700
Subject: [PATCH 05/23] Update build.sh

---
 .jenkins/build.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index cd296cc245..e36135edab 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -23,9 +23,9 @@ sudo apt-get install -y pandoc
 # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
-pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
-# sudo pip uninstall -y fbgemm-gpu torchrec
-# sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
+pip3 install torch==2.7.0 torchvision --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
+#sudo pip uninstall -y fbgemm-gpu torchrec
+#sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 
 
 # Install two language tokenizers for Translation with TorchText tutorial

From 20365bfcd9282f1792fed63622719eb9b5e33c92 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Tue, 18 Mar 2025 15:51:21 -0700
Subject: [PATCH 06/23] Update build.sh

---
 .jenkins/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index e36135edab..477fd1d639 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -23,7 +23,7 @@ sudo apt-get install -y pandoc
 # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
-pip3 install torch==2.7.0 torchvision --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
+pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 #sudo pip uninstall -y fbgemm-gpu torchrec
 #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 

From e51b6e66c8c967a78284b767baf4e4f825cc7a37 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Wed, 19 Mar 2025 12:57:22 -0700
Subject: [PATCH 07/23] Update onnxscript in requirements (#3300)

---
 .ci/docker/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt
index c032835633..434299cc23 100644
--- a/.ci/docker/requirements.txt
+++ b/.ci/docker/requirements.txt
@@ -36,7 +36,7 @@ datasets
 transformers
 torchmultimodal-nightly # needs to be updated to stable as soon as it's avaialable
 onnx
-onnxscript
+onnxscript>=0.2.2
 onnxruntime
 evaluate
 accelerate>=0.20.1

From eff088bb500f4b5e2b0d8e0c18f1bf9cfbf08b93 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Wed, 19 Mar 2025 13:44:47 -0700
Subject: [PATCH 08/23] Update requirements.txt

---
 .ci/docker/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt
index 434299cc23..f969963988 100644
--- a/.ci/docker/requirements.txt
+++ b/.ci/docker/requirements.txt
@@ -28,8 +28,8 @@ tensorboard
 jinja2==3.1.3
 pytorch-lightning
 torchx
-torchrl==0.6.0
-tensordict==0.6.0
+torchrl==0.7.2
+tensordict==0.7.2
 ax-platform>=0.4.0
 nbformat>=5.9.2
 datasets

From dc969fdad0e1953f397b014fa07067a9be0d0f1d Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Fri, 21 Mar 2025 09:05:59 -0700
Subject: [PATCH 09/23] Update

---
 .jenkins/validate_tutorials_built.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
index f78ec11e1a..399ca71bcf 100644
--- a/.jenkins/validate_tutorials_built.py
+++ b/.jenkins/validate_tutorials_built.py
@@ -50,7 +50,14 @@
     "intermediate_source/flask_rest_api_tutorial",
     "intermediate_source/text_to_speech_with_torchaudio",
     "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
-    "advanced_source/semi_structured_sparse" # reenable after 3303 is fixed.
+    "advanced_source/semi_structured_sparse", # reenable after 3303 is fixed.
+    "intermediate_source/mario_rl_tutorial", # reenable after 3302 is fixed
+    "intermediate_source/reinforcement_ppo", # reenable after 3302 is fixed
+    "intermediate_source/pinmem_nonblock", # reenable after 3302 is fixed
+    "intermediate_source/dqn_with_rnn_tutorial", # reenable after 3302 is fixed
+    "advanced_source/pendulum", # reenable after 3302 is fixed
+    "advanced_source/coding_ddpg", # reenable after 3302 is fixed
+    "intermediate_source/torchrec_intro_tutorial" # reenable after 3302 is fixed
 ]
 
 def tutorial_source_dirs() -> List[Path]:

From 3cdb01ffe7fdbdd2c1cea30e9c5eb29769d82c60 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Fri, 21 Mar 2025 12:17:16 -0700
Subject: [PATCH 10/23] Update build.sh

---
 .jenkins/build.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index 477fd1d639..069c21217f 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -22,8 +22,11 @@ sudo apt-get install -y pandoc
 #Install PyTorch Nightly for test.
 # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
-sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
+sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict
 pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
+pip install git+https://github.com/pytorch/tensordict
+pip install git+https://github.com/pytorch/torchrl
+
 #sudo pip uninstall -y fbgemm-gpu torchrec
 #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 

From 4b04c9ba9234dfbc12f00aade2f5fe9991854939 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Fri, 21 Mar 2025 12:17:41 -0700
Subject: [PATCH 11/23] Update .jenkins/validate_tutorials_built.py

---
 .jenkins/validate_tutorials_built.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
index 399ca71bcf..f78ec11e1a 100644
--- a/.jenkins/validate_tutorials_built.py
+++ b/.jenkins/validate_tutorials_built.py
@@ -50,14 +50,7 @@
     "intermediate_source/flask_rest_api_tutorial",
     "intermediate_source/text_to_speech_with_torchaudio",
     "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
-    "advanced_source/semi_structured_sparse", # reenable after 3303 is fixed.
-    "intermediate_source/mario_rl_tutorial", # reenable after 3302 is fixed
-    "intermediate_source/reinforcement_ppo", # reenable after 3302 is fixed
-    "intermediate_source/pinmem_nonblock", # reenable after 3302 is fixed
-    "intermediate_source/dqn_with_rnn_tutorial", # reenable after 3302 is fixed
-    "advanced_source/pendulum", # reenable after 3302 is fixed
-    "advanced_source/coding_ddpg", # reenable after 3302 is fixed
-    "intermediate_source/torchrec_intro_tutorial" # reenable after 3302 is fixed
+    "advanced_source/semi_structured_sparse" # reenable after 3303 is fixed.
 ]
 
 def tutorial_source_dirs() -> List[Path]:

From 4aae3b1a35beafcb76eeaa78f3c3da3b627188f7 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Fri, 21 Mar 2025 14:16:22 -0700
Subject: [PATCH 12/23] Update build.sh

---
 .jenkins/build.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index 069c21217f..49e0668640 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -24,9 +24,8 @@ sudo apt-get install -y pandoc
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict
 pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
-pip install git+https://github.com/pytorch/tensordict
-pip install git+https://github.com/pytorch/torchrl
-
+pip install git+https://github.com/pytorch/tensordict.git@main --depth=1
+pip install git+https://github.com/pytorch/torchrl.git@main --depth=1
 #sudo pip uninstall -y fbgemm-gpu torchrec
 #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 

From 327b32b62a5541b6c08dcb0aa764a95a6917ee4d Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Fri, 21 Mar 2025 17:10:36 -0700
Subject: [PATCH 13/23] Update .jenkins/build.sh

---
 .jenkins/build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index 49e0668640..70f841af8a 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -24,8 +24,8 @@ sudo apt-get install -y pandoc
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict
 pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
-pip install git+https://github.com/pytorch/tensordict.git@main --depth=1
-pip install git+https://github.com/pytorch/torchrl.git@main --depth=1
+pip install git+https://github.com/pytorch/tensordict
+pip install git+https://github.com/pytorch/torchrl
 #sudo pip uninstall -y fbgemm-gpu torchrec
 #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 

From 0b69e411c6c8d03e602c3d39fd1346a2963dd4f5 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Mon, 24 Mar 2025 09:51:43 -0700
Subject: [PATCH 14/23] Update build.sh

---
 .jenkins/build.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index 70f841af8a..7208376475 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -24,8 +24,6 @@ sudo apt-get install -y pandoc
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict
 pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
-pip install git+https://github.com/pytorch/tensordict
-pip install git+https://github.com/pytorch/torchrl
 #sudo pip uninstall -y fbgemm-gpu torchrec
 #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 

From 4221d54ef82b45c0d1659e30a391a0e87fcb6106 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Mon, 24 Mar 2025 10:01:57 -0700
Subject: [PATCH 15/23] Apply suggestions from code review

---
 .jenkins/build.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index 7208376475..af795d3dad 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -24,7 +24,10 @@ sudo apt-get install -y pandoc
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict
 pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
-#sudo pip uninstall -y fbgemm-gpu torchrec
+sudo pip uninstall -y fbgemm-gpu torchrec
+sudo pip3 install -y https://download.pytorch.org/whl/nightly/fbgemm-gpu/
+sudo pip3 install -y https://download.pytorch.org/whl/nightly/torchrec/
+
 #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 
 

From 68e58aea1bc4b857bb976038c87a79637cc99cc9 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Mon, 24 Mar 2025 11:05:13 -0700
Subject: [PATCH 16/23] Update build.sh

---
 .jenkins/build.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index af795d3dad..e40dd82ecc 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -25,9 +25,7 @@ sudo apt-get install -y pandoc
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict
 pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 sudo pip uninstall -y fbgemm-gpu torchrec
-sudo pip3 install -y https://download.pytorch.org/whl/nightly/fbgemm-gpu/
-sudo pip3 install -y https://download.pytorch.org/whl/nightly/torchrec/
-
+sudo pip3 install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu126/
 #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 
 

From 69802e070eec30e9de1666ceb079704b5aa8cb0e Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Mon, 24 Mar 2025 13:34:28 -0700
Subject: [PATCH 17/23] Update requirements.txt

---
 .ci/docker/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt
index f969963988..0e95c62c6b 100644
--- a/.ci/docker/requirements.txt
+++ b/.ci/docker/requirements.txt
@@ -69,5 +69,5 @@ pycocotools
 semilearn==0.3.2
 torchao==0.5.0
 segment_anything==1.0
-torchrec==1.0.0; platform_system == "Linux"
+torchrec==1.1.0; platform_system == "Linux"
 fbgemm-gpu==1.1.0; platform_system == "Linux"

From d463fd49d0aaa85053cf39b64d57c955adc3142a Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Mon, 24 Mar 2025 13:35:12 -0700
Subject: [PATCH 18/23] Update .jenkins/build.sh

---
 .jenkins/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index e40dd82ecc..f8916478ca 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -24,7 +24,7 @@ sudo apt-get install -y pandoc
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict
 pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
-sudo pip uninstall -y fbgemm-gpu torchrec
+sudo pip uninstall -y fbgemm-gpu
 sudo pip3 install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu126/
 #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 

From 81efd5fc778030111f5bcf61760fb1c83bd8789e Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Mon, 24 Mar 2025 14:36:37 -0700
Subject: [PATCH 19/23] Update .jenkins/build.sh

---
 .jenkins/build.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index f8916478ca..ccd3683779 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -26,6 +26,8 @@ sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl t
 pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 sudo pip uninstall -y fbgemm-gpu
 sudo pip3 install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu126/
+pip install tensordict-nightly
+pip install torchrl-nightly
 #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 
 

From f7d8e7a565f69c0cf512d0e1421ac289282df1f3 Mon Sep 17 00:00:00 2001
From: Bin Bao <binbao@meta.com>
Date: Tue, 25 Mar 2025 17:35:34 -0400
Subject: [PATCH 20/23] Fix the AOTI example (#3306)

Summary: The compiled model run takes the same input as Eager. No need to explicitly compose args as a tuple.
---
 intermediate_source/torch_export_tutorial.py | 2 +-
 recipes_source/torch_export_aoti_python.py   | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/intermediate_source/torch_export_tutorial.py b/intermediate_source/torch_export_tutorial.py
index 3ca6d09a52..20b1b4023e 100644
--- a/intermediate_source/torch_export_tutorial.py
+++ b/intermediate_source/torch_export_tutorial.py
@@ -995,7 +995,7 @@ def forward(self, x):
 #    with torch.no_grad():
 #        pt2_path = torch._inductor.aoti_compile_and_package(ep)
 #
-#    # Load and run the .so file in Python.
+#    # Load and run the .pt2 file in Python.
 #    # To load and run it in a C++ environment, see:
 #    # https://pytorch.org/docs/main/torch.compiler_aot_inductor.html
 #    aoti_compiled = torch._inductor.aoti_load_package(pt2_path)
diff --git a/recipes_source/torch_export_aoti_python.py b/recipes_source/torch_export_aoti_python.py
index c0cbb7e280..ff311f071e 100644
--- a/recipes_source/torch_export_aoti_python.py
+++ b/recipes_source/torch_export_aoti_python.py
@@ -176,7 +176,7 @@
 model_path = os.path.join(os.getcwd(), "resnet18.pt2")
 
 compiled_model = torch._inductor.aoti_load_package(model_path)
-example_inputs = (torch.randn(2, 3, 224, 224, device=device),)
+example_inputs = torch.randn(2, 3, 224, 224, device=device)
 
 with torch.inference_mode():
     output = compiled_model(example_inputs)
@@ -238,11 +238,11 @@ def timed(fn):
 
 torch._dynamo.reset()
 
-model = torch._inductor.aoti_load_package(model_path)
-example_inputs = (torch.randn(1, 3, 224, 224, device=device),)
+compiled_model = torch._inductor.aoti_load_package(model_path)
+example_inputs = torch.randn(1, 3, 224, 224, device=device)
 
 with torch.inference_mode():
-    _, time_taken = timed(lambda: model(example_inputs))
+    _, time_taken = timed(lambda: compiled_model(example_inputs))
     print(f"Time taken for first inference for AOTInductor is {time_taken:.2f} ms")
 
 

From f58cf379931b094541e9c0955fe9e0b33b49f182 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Tue, 25 Mar 2025 19:36:05 -0700
Subject: [PATCH 21/23] Update build.sh

---
 .jenkins/build.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index ccd3683779..7705a429cd 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -24,10 +24,10 @@ sudo apt-get install -y pandoc
 # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict
 pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
-sudo pip uninstall -y fbgemm-gpu
-sudo pip3 install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu126/
-pip install tensordict-nightly
-pip install torchrl-nightly
+#sudo pip uninstall -y fbgemm-gpu
+#sudo pip3 install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu126/
+#pip install tensordict-nightly
+#pip install torchrl-nightly
 #sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126
 
 

From 6e3f90a613028768708ac22c0c1b2eaf373fe9f2 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Tue, 25 Mar 2025 19:45:54 -0700
Subject: [PATCH 22/23] Disable rl tutorials again

---
 .jenkins/validate_tutorials_built.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
index f78ec11e1a..399ca71bcf 100644
--- a/.jenkins/validate_tutorials_built.py
+++ b/.jenkins/validate_tutorials_built.py
@@ -50,7 +50,14 @@
     "intermediate_source/flask_rest_api_tutorial",
     "intermediate_source/text_to_speech_with_torchaudio",
     "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
-    "advanced_source/semi_structured_sparse" # reenable after 3303 is fixed.
+    "advanced_source/semi_structured_sparse", # reenable after 3303 is fixed.
+    "intermediate_source/mario_rl_tutorial", # reenable after 3302 is fixed
+    "intermediate_source/reinforcement_ppo", # reenable after 3302 is fixed
+    "intermediate_source/pinmem_nonblock", # reenable after 3302 is fixed
+    "intermediate_source/dqn_with_rnn_tutorial", # reenable after 3302 is fixed
+    "advanced_source/pendulum", # reenable after 3302 is fixed
+    "advanced_source/coding_ddpg", # reenable after 3302 is fixed
+    "intermediate_source/torchrec_intro_tutorial" # reenable after 3302 is fixed
 ]
 
 def tutorial_source_dirs() -> List[Path]:

From 95d518129c8987a14a187aa761d1e8e6006e5a66 Mon Sep 17 00:00:00 2001
From: Michael Lazos <mlazos@meta.com>
Date: Tue, 15 Apr 2025 13:40:40 -0700
Subject: [PATCH 23/23] First commit

---
 recipes_source/foreach_map.py    | 198 +++++++++++++++++++++++++++++++
 recipes_source/recipes_index.rst |   9 ++
 2 files changed, 207 insertions(+)
 create mode 100644 recipes_source/foreach_map.py

diff --git a/recipes_source/foreach_map.py b/recipes_source/foreach_map.py
new file mode 100644
index 0000000000..655c0c5aa2
--- /dev/null
+++ b/recipes_source/foreach_map.py
@@ -0,0 +1,198 @@
+"""
+(beta) Explicit horizontal fusion with foreach_map and torch.compile
+============================================================
+
+**Author:** `Michael Lazos <https://github.com/mlazos>`_
+"""
+
+#########################################################
+#  Horizontal fusion is a key optimization in ML compilers. In eager,
+#  this is typically expressed using the torch._foreach* ops which parallelizes
+#  operations across a list of tensors. However, supporting all possible permutations
+#  of arguments is quite difficult (e.g. mixtures of scalars and lists). Foreach_map
+#  allows conversion of any pointwise op in ``torch`` to a horiztonally fused foreach
+#  variant. In this tutorial, we will demonstrate how to implement the Adam optimizer
+#  with ``foreach_map`` to generate a fully fused kernel.  
+# 
+#
+# .. note::
+#
+#    This tutorial requires PyTorch 2.7.0 or later.
+
+#####################################################################
+# Model Setup
+# ~~~~~~~~~~~~~~~~~~~~~
+# For this example, we'll use a simple sequence of linear layers.
+# We instantiate an independent copy to compare the two optimizer implementations.
+#
+import torch
+
+# exit cleanly if we are on a device that doesn't support ``torch.compile``
+if torch.cuda.get_device_capability() < (7, 0):
+    print("Exiting because torch.compile is not supported on this device.")
+    import sys
+    sys.exit(0)
+
+# Create simple model
+model = torch.nn.Sequential(
+    *[torch.nn.Linear(1024, 1024, False, device="cuda") for _ in range(10)]
+)
+model_copy = torch.nn.Sequential(
+    *[torch.nn.Linear(1024, 1024, False, device="cuda") for _ in range(10)]
+)
+input = torch.rand(1024, device="cuda")
+
+# run forward pass
+output = model(input)
+output_copy = model_copy(input)
+
+# run backward to populate the grads for our optimizer below
+output.sum().backward()
+output_copy.sum().backward()
+
+#####################################################################
+# Helper functions for foreach_map implementation
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# In this section, we'll begin our implementation of the Adam optimizer.
+#
+from torch._higher_order_ops.foreach_map import foreach_map
+
+# Helper function to extract optimizer states from a torch.optim.Adam instance
+def get_inputs(optim):
+    steps = []
+    params = []
+    grads = []
+    exp_avgs = []
+    exp_avg_sqs = []
+    for group in optim.param_groups:
+        for p in group["params"]:
+            params.append(p)
+            grads.append(p.grad)
+            state = optim.state[p]
+            exp_avgs.append(state["exp_avg"])
+            exp_avg_sqs.append(state["exp_avg_sq"])
+            steps.append(state["step"])
+
+    return steps, params, exp_avgs, exp_avg_sqs
+
+
+# Functions to update the different optimizer states
+def update_exp_avg_sq(exp_avg_sq, grad, beta2):
+    return exp_avg_sq.mul(beta2).addcmul(grad, grad, value=1 - beta2)
+
+def update_param(param, step, exp_avg, exp_avg_sq, beta1, beta2, lr, eps):
+    bias_correction1 = 1 - torch.pow(beta1, step)
+    bias_correction2 = (1 - torch.pow(beta2, step)).sqrt()
+    step_size = (lr / bias_correction1).neg()
+    denom = (exp_avg_sq.sqrt() / (bias_correction2 * step_size)).add(eps / step_size)
+    return torch.add(param, torch.div(exp_avg, denom))
+
+# Our full Adam implementation
+def foreach_map_adam(
+    steps,
+    params,
+    exp_avgs,
+    exp_avg_sqs,
+    weight_decay=0,
+    beta1=0.9,
+    beta2=0.999,
+    lr=1e-3,
+    eps=1e-8,
+):
+    with torch.no_grad():
+        grads = [param.grad for param in params]
+        # update step
+        updated_steps = foreach_map(lambda x: x + 1, steps)
+        torch._foreach_copy_(steps, updated_steps)
+
+        if weight_decay != 0:
+            foreach_map(torch.add, (grads,), alpha=weight_decay)
+
+        # Higher-order operators (HOPs) cannot have multiple outputs at the moment
+        # need to call foreach_map once for each output
+        exp_avgs_updated = foreach_map(torch.lerp, exp_avgs, grads, 1 - beta1)
+        exp_avgs_sq_updated = foreach_map(update_exp_avg_sq, exp_avg_sqs, grads, beta2)
+        params_updated = foreach_map(
+            update_param,
+            params,
+            steps,
+            exp_avgs_updated,
+            exp_avgs_sq_updated,
+            beta1,
+            beta2,
+            lr,
+            eps,
+        )
+        # Higher-order operators (HOPs) don't support input mutation today
+        # so manually  update the states in-place
+        torch._foreach_copy_(exp_avgs, exp_avgs_updated)
+        torch._foreach_copy_(exp_avg_sqs, exp_avgs_sq_updated)
+        torch._foreach_copy_(params, params_updated)
+    return
+
+#####################################################################
+# Setting up and running the compiled kernel
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# In this section, we'll run our Adam optimizer 
+# and compare the results
+#
+# .. note::
+#
+#    ``torch.compile`` is only supported on CUDA devices that have a compute capability of 7.0 or higher.
+opt_eager = torch.optim.Adam(model.parameters(), lr=torch.tensor(0.01))
+opt_eager_copy = torch.optim.Adam(model_copy.parameters(), lr=torch.tensor(0.01))
+
+# warm up the optimizer state dict
+opt_eager.step()
+opt_eager_copy.step()
+
+inputs = get_inputs(opt_eager_copy)
+compiled_adam = torch.compile(foreach_map_adam)
+
+# optionally view the output code
+torch._logging.set_logs(output_code=True)
+
+# Warmup runs to compile the function
+for _ in range(5):
+    opt_eager.step()
+    compiled_adam(*inputs)
+
+for eager_p, compile_p in zip(opt_eager.param_groups[0]["params"], opt_eager_copy.param_groups[0]["params"]):
+    torch.allclose(eager_p, compile_p)
+
+# Benchmark performance
+
+ # Let's define a helpful benchmarking function:
+import torch.utils.benchmark as benchmark
+
+def benchmark_torch_function_in_microseconds(f, *args, **kwargs):
+    t0 = benchmark.Timer(
+        stmt="f(*args, **kwargs)", globals={"args": args, "kwargs": kwargs, "f": f}
+    )
+    return t0.blocked_autorange().mean * 1e6
+
+eager_runtime = benchmark_torch_function_in_microseconds(opt_eager.step)
+compiled_runtime = benchmark_torch_function_in_microseconds(lambda: compiled_adam(*inputs))
+
+assert eager_runtime > compiled_runtime
+   
+print(f"eager runtime: {eager_runtime}us")
+print(f"compiled runtime: {compiled_runtime}us")
+
+
+
+######################################################################
+# Conclusion
+# ~~~~~~~~~~
+# In this tutorial, we successfully implemented a custom fully-fused Adam optimizer using foreach_map. 
+# By leveraging the power of foreach_map and torch.compile, we were able to create an optimized version of the Adam 
+# optimizer that can be used in various machine learning applications. This tutorial provides a comprehensive guide 
+# on how to use foreach_map and torch.compile to optimize machine learning models, and serves as a 
+# valuable resource for developers looking to improve the performance of their models with horizontal fusion.
+#
+# See also:
+#
+# * `Compiled optimizer tutorial <https://pytorch.org/tutorials/recipes/compiling_optimizer.html>`__ - an intro into the compiled optimizer.
+# * `Compiling the optimizer with PT2 <https://dev-discuss.pytorch.org/t/compiling-the-optimizer-with-pt2/1669>`__ - deeper technical details on the compiled optimizer.
diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst
index f136c4b9c6..1ad3b0d97a 100644
--- a/recipes_source/recipes_index.rst
+++ b/recipes_source/recipes_index.rst
@@ -335,6 +335,15 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu
    :link: ../recipes/compiling_optimizer_lr_scheduler.html
    :tags: Model-Optimization
 
+.. (beta) Explicit horizontal fusion with foreach_map and torch.compile
+
+.. customcarditem::
+   :header: (beta) Explicit horizontal fusion with foreach_map and torch.compile
+   :card_description: Horizontally fuse pointwise ops with torch.compile
+   :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png
+   :link: ../recipes/foreach_map.py
+   :tags: Model-Optimization
+
 .. Using User-Defined Triton Kernels with ``torch.compile``
 
 .. customcarditem::