From 9f5960ccc984a8ba59eeba58fc6ccfb89a9ee8cb Mon Sep 17 00:00:00 2001
From: Kevin Wu <kevinwu@tenstorrent.com>
Date: Wed, 8 Jan 2025 00:12:49 +0000
Subject: [PATCH 1/5] Add fallback to arange and argmax variations for
 falcon-7b

---
 torch_ttnn/passes/lowering/to_tt_guard.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/torch_ttnn/passes/lowering/to_tt_guard.py b/torch_ttnn/passes/lowering/to_tt_guard.py
index bfe128809..fb6b39c14 100644
--- a/torch_ttnn/passes/lowering/to_tt_guard.py
+++ b/torch_ttnn/passes/lowering/to_tt_guard.py
@@ -83,6 +83,21 @@
     ["List[Tensor] tensors = [<[13600]>, <[13600]>, <[13600]>, <[13600]>]", "int dim = 1"],
 ]
 
+############################################################
+# EXTRA BLOCKLIST OF falcon-7b-instruct
+############################################################
+# Statically allocated circular buffers on core range [(x=0,y=0) - (x=0,y=0)] grow to 3580704 B which is beyond max L1 size of 1499136 B
+aten_arange_start_step = [
+    [
+        "number start = 7",
+        "number end = 0",
+        "number step = -1",
+        "Optional[Device] device = cpu",
+        "Optional[bool] pin_memory = False",
+    ]
+]
+aten_argmax_default = [["Tensor<[1, 7]> self = ?", "Optional[int] dim = 1", "bool keepdim = True"]]
+
 ############################################################
 # EXTRA BLOCKLIST OF retinanet_resnet50_fpn_v2
 ############################################################
@@ -94,6 +109,8 @@
 GUARD[torch.ops.aten.gt.Scalar] = partial(guard_aten, aten_gt_Scalar_blocklist)
 GUARD[torch.ops.aten.cumsum.default] = partial(guard_aten, aten_cumsum_default_blocklist)
 GUARD[torch.ops.aten.stack.default] = partial(guard_aten, aten_aten_stack_default)
+GUARD[torch.ops.aten.arange.start_step] = partial(guard_aten, aten_arange_start_step)
+GUARD[torch.ops.aten.argmax.default] = partial(guard_aten, aten_argmax_default)
 
 
 def can_lowering_to_ttnn(node):

From a0a66bc71c215829ba09da49b5f05b084e880ce2 Mon Sep 17 00:00:00 2001
From: Kevin Wu <kevinwu@tenstorrent.com>
Date: Wed, 8 Jan 2025 18:27:19 +0000
Subject: [PATCH 2/5] Append argmax blocklist instead since it already exists

---
 torch_ttnn/passes/lowering/to_tt_guard.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/torch_ttnn/passes/lowering/to_tt_guard.py b/torch_ttnn/passes/lowering/to_tt_guard.py
index fb6b39c14..1a58e46fd 100644
--- a/torch_ttnn/passes/lowering/to_tt_guard.py
+++ b/torch_ttnn/passes/lowering/to_tt_guard.py
@@ -96,7 +96,7 @@
         "Optional[bool] pin_memory = False",
     ]
 ]
-aten_argmax_default = [["Tensor<[1, 7]> self = ?", "Optional[int] dim = 1", "bool keepdim = True"]]
+aten_argmax_default_blocklist += [["Tensor<[1, 7]> self = ?", "Optional[int] dim = 1", "bool keepdim = True"]]
 
 ############################################################
 # EXTRA BLOCKLIST OF retinanet_resnet50_fpn_v2
@@ -110,7 +110,6 @@
 GUARD[torch.ops.aten.cumsum.default] = partial(guard_aten, aten_cumsum_default_blocklist)
 GUARD[torch.ops.aten.stack.default] = partial(guard_aten, aten_aten_stack_default)
 GUARD[torch.ops.aten.arange.start_step] = partial(guard_aten, aten_arange_start_step)
-GUARD[torch.ops.aten.argmax.default] = partial(guard_aten, aten_argmax_default)
 
 
 def can_lowering_to_ttnn(node):

From 4c71b0f0d9fba2ea2cc09eba88a57eb7cb636e64 Mon Sep 17 00:00:00 2001
From: Kevin Wu <kevinwu@tenstorrent.com>
Date: Wed, 8 Jan 2025 22:52:39 +0000
Subject: [PATCH 3/5] Add arange.start_step to constantfolding pass instead

---
 torch_ttnn/passes/constant_folding_pass.py |  1 +
 torch_ttnn/passes/lowering/to_tt_guard.py  | 10 ----------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/torch_ttnn/passes/constant_folding_pass.py b/torch_ttnn/passes/constant_folding_pass.py
index dbd8de010..0b62d404f 100644
--- a/torch_ttnn/passes/constant_folding_pass.py
+++ b/torch_ttnn/passes/constant_folding_pass.py
@@ -11,6 +11,7 @@ def __init__(self):
             torch.ops.aten.lift_fresh_copy.default,
             torch.ops.aten.pow.Tensor_Tensor,
             torch.ops.aten.arange.start,
+            torch.ops.aten.arange.start_step,
             torch.ops.aten.unsqueeze.default,
             torch.ops.aten.arange.default,
             torch.ops.aten.view.default,
diff --git a/torch_ttnn/passes/lowering/to_tt_guard.py b/torch_ttnn/passes/lowering/to_tt_guard.py
index 1a58e46fd..bc36191f0 100644
--- a/torch_ttnn/passes/lowering/to_tt_guard.py
+++ b/torch_ttnn/passes/lowering/to_tt_guard.py
@@ -87,15 +87,6 @@
 # EXTRA BLOCKLIST OF falcon-7b-instruct
 ############################################################
 # Statically allocated circular buffers on core range [(x=0,y=0) - (x=0,y=0)] grow to 3580704 B which is beyond max L1 size of 1499136 B
-aten_arange_start_step = [
-    [
-        "number start = 7",
-        "number end = 0",
-        "number step = -1",
-        "Optional[Device] device = cpu",
-        "Optional[bool] pin_memory = False",
-    ]
-]
 aten_argmax_default_blocklist += [["Tensor<[1, 7]> self = ?", "Optional[int] dim = 1", "bool keepdim = True"]]
 
 ############################################################
@@ -109,7 +100,6 @@
 GUARD[torch.ops.aten.gt.Scalar] = partial(guard_aten, aten_gt_Scalar_blocklist)
 GUARD[torch.ops.aten.cumsum.default] = partial(guard_aten, aten_cumsum_default_blocklist)
 GUARD[torch.ops.aten.stack.default] = partial(guard_aten, aten_aten_stack_default)
-GUARD[torch.ops.aten.arange.start_step] = partial(guard_aten, aten_arange_start_step)
 
 
 def can_lowering_to_ttnn(node):

From 3e2484635bfa35f53db7ecd40bfc1a3f585a6719 Mon Sep 17 00:00:00 2001
From: Kevin Wu <kevinwu@tenstorrent.com>
Date: Thu, 9 Jan 2025 16:45:24 +0000
Subject: [PATCH 4/5] Fix arange test

---
 tests/lowering/creation/test_arange.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lowering/creation/test_arange.py b/tests/lowering/creation/test_arange.py
index be863f311..fa81cb1dd 100644
--- a/tests/lowering/creation/test_arange.py
+++ b/tests/lowering/creation/test_arange.py
@@ -90,6 +90,6 @@ def test_arange_start_step(device, input_shapes):
 
     # Check the graph has be rewritten and contain ttnn ops
     nodes = list(option._out_fx_graphs[0].nodes)
-    assert [node.target for node in nodes].count(ttnn.arange) == 1
+    assert [node.target for node in nodes].count(ttnn.arange) == 1 or [node.op for node in nodes].count("get_attr")
     # Check inference result
     assert torch.allclose(result_before, result_after)

From cb127a227c2aaa9e3096d11c003e0bf5324f497e Mon Sep 17 00:00:00 2001
From: Kevin Wu <kevinwu@tenstorrent.com>
Date: Wed, 22 Jan 2025 18:02:44 +0000
Subject: [PATCH 5/5] Correct error message

---
 torch_ttnn/passes/lowering/to_tt_guard.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/torch_ttnn/passes/lowering/to_tt_guard.py b/torch_ttnn/passes/lowering/to_tt_guard.py
index bc36191f0..a8ac05350 100644
--- a/torch_ttnn/passes/lowering/to_tt_guard.py
+++ b/torch_ttnn/passes/lowering/to_tt_guard.py
@@ -86,7 +86,8 @@
 ############################################################
 # EXTRA BLOCKLIST OF falcon-7b-instruct
 ############################################################
-# Statically allocated circular buffers on core range [(x=0,y=0) - (x=0,y=0)] grow to 3580704 B which is beyond max L1 size of 1499136 B
+# RuntimeError: TT_THROW @ /tmp/build-via-sdist-d26xvola/ttnn-0.54.0rc18+wormhole.b0/ttnn/cpp/ttnn/device_operation.hpp:487: tt::exception
+# Unsupported storage type
 aten_argmax_default_blocklist += [["Tensor<[1, 7]> self = ?", "Optional[int] dim = 1", "bool keepdim = True"]]
 
 ############################################################