tenstorrent
diff --git a/‎tests/cpp_extension/test_cpp_extension.py
Lines changed: 71 additions & 43 deletions b/‎tests/cpp_extension/test_cpp_extension.py
Lines changed: 71 additions & 43 deletions
diff --git a/‎tests/utils.py
Lines changed: 4 additions & 0 deletions b/‎tests/utils.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎torch_ttnn/cpp_extension/TtnnOpaqueTensorImpl.h renamed to ‎torch_ttnn/cpp_extension/TtnnTensorImpl.hpp
Lines changed: 4 additions & 47 deletions b/‎torch_ttnn/cpp_extension/TtnnOpaqueTensorImpl.h renamed to ‎torch_ttnn/cpp_extension/TtnnTensorImpl.hpp
Lines changed: 4 additions & 47 deletions
diff --git a/‎torch_ttnn/cpp_extension/custom_device_mode.py
Lines changed: 11 additions & 9 deletions b/‎torch_ttnn/cpp_extension/custom_device_mode.py
Lines changed: 11 additions & 9 deletions
@@ -7,47 +7,64 @@
 import torch_ttnn
 from torch_ttnn.cpp_extension.custom_device_mode import ttnn_module, enable_ttnn_device
 import pytest
+import time
 
 from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 
 import logging
 import sys
 
+
 @pytest.mark.parametrize(
     "input_shape",
-    ((32, 1, 3, 3), (32,)),
+    ((32, 1, 3, 3), (1, 32)),
+)
+@pytest.mark.parametrize(
+    "dtype",
+    (torch.bfloat16, torch.int32),
 )
-def test_cpp_extension(device, input_shape):
-    torch.utils.rename_privateuse1_backend('ttnn')
+def test_cpp_extension(device, input_shape, dtype):
+    torch.utils.rename_privateuse1_backend("ttnn")
 
     # in pytest the device has already been initialized before this call
     # so instead we can wrap this around the custom device
     ttnn_device = ttnn_module.custom_device_from_ttnn(device)
 
     logging.info("Creating bfloat tensor from -1 to 1")
-    torch_tensor = torch.empty(input_shape, dtype = torch.bfloat16).uniform_(-1, 1)
+    if dtype == torch.bfloat16:
+        torch_tensor = torch.empty(input_shape, dtype=dtype).uniform_(-1, 1)
+    elif dtype == torch.int32:
+        torch_tensor = torch.randint(-1000, 1000, input_shape)
+        torch_tensor = torch_tensor.to(torch.int32)
+    else:
+        raise Exception(f"{dtype} not being tested at this time")
     print(torch_tensor)
-    torch_tensor_abs = torch.abs(torch_tensor)
-    print(torch_tensor_abs)
 
     logging.info("Transferring to ttnn")
     torch_ttnn_tensor = torch_tensor.to(ttnn_device)
 
-    logging.info("get underlying ttnn tensor")
+    logging.info("Get underlying ttnn tensor")
     ttnn_tensor = ttnn_module.get_ttnn_tensor(torch_ttnn_tensor)
 
-    logging.info("Running abs on ttnn")
-    ttnn_tensor = ttnn.abs(ttnn_tensor)
+    # Compare output of abs op for bfloat16 dtype since ttnn.abs does not support int
+    if dtype == torch.bfloat16:
+        torch_out = torch.abs(torch_tensor)
+        print(torch_out)
+
+        logging.info("Running abs on ttnn")
+        ttnn_tensor = ttnn.abs(ttnn_tensor)
+    elif dtype == torch.int32:
+        torch_out = torch_tensor
+    else:
+        raise Exception(f"{dtype} not being tested at this time")
 
     logging.info("calling to_torch")
     ttnn_to_torch = ttnn.to_torch(ttnn_tensor)
+
     print(ttnn_to_torch)
-    
-    
-    assert torch.allclose(torch_tensor_abs, ttnn_to_torch, rtol=0.1, atol=0.1)
 
-    # logging.info("Closing device")
-    # ttnn_module.close_custom_device(ttnn_device)
+    assert torch.allclose(torch_out, ttnn_to_torch, rtol=0.1, atol=0.1)
+
 
 def test_bert_with_cpp_extension(device):
     model_name = "phiyodr/bert-large-finetuned-squad2"
@@ -66,34 +83,45 @@ def test_bert_with_cpp_extension(device):
     )
 
     option = torch_ttnn.TorchTtnnOption(
-                    device=device,
-                    gen_graphviz=False,
-                    run_mem_analysis=False,
-                    metrics_path=model_name,
-                    verbose=True,
-                )
+        device=device,
+        gen_graphviz=False,
+        run_mem_analysis=False,
+        metrics_path=model_name,
+        verbose=True,
+    )
 
     # custom device
-    torch.utils.rename_privateuse1_backend('ttnn')
+    torch.utils.rename_privateuse1_backend("ttnn")
     ttnn_device = ttnn_module.custom_device_from_ttnn(device)
-    
+
     # clone input_ids on cpu since this the data transfer is somehow inplace?
     input_ids = inputs.input_ids.clone()
-    
-    inputs = inputs.to(ttnn_device)
-    # modules are inplace, tensors are not
-    m.to(ttnn_device)
 
-    model = torch.compile(m, backend=torch_ttnn.backend, options=option)
-    outputs = model(**inputs)
-    
     # Helper function to decode output to human-readable text
     def decode_output(outputs):
         response_start = torch.argmax(outputs.start_logits)
         response_end = torch.argmax(outputs.end_logits) + 1
         response_tokens = input_ids[0, response_start:response_end]
         return tokenizer.decode(response_tokens)
 
+    # comment out these to disable cpp extension
+    start_to = time.perf_counter() * 1000
+    inputs = inputs.to(ttnn_device)
+    # modules are inplace, tensors are not
+    m.to(ttnn_device)
+    end_to = time.perf_counter() * 1000
+    print(f"to: {end_to - start_to} (ms)")
+
+    model = torch.compile(m, backend=torch_ttnn.backend, options=option)
+
+    for idx in range(5):
+        start = time.perf_counter() * 1000
+        # Don't need to reset options if inputs don't change because of cache
+        outputs = model(**inputs)
+        end = time.perf_counter() * 1000
+        run_time = end - start
+        print(f"iter {idx}: {run_time} (ms)")
+
     print("finished:")
     print(outputs)
     answer = decode_output(outputs)
@@ -108,9 +136,10 @@ def decode_output(outputs):
     """
     )
 
+
 # adapted from https://github.com/pytorch/examples/blob/main/mnist/main.py
 class MnistModel(torch.nn.Module):
-    def __init__(self):      
+    def __init__(self):
         super(MnistModel, self).__init__()
         self.conv1 = nn.Conv2d(1, 32, 3, 1)
         self.conv2 = nn.Conv2d(32, 64, 3, 1)
@@ -133,8 +162,9 @@ def forward(self, x):
         x = self.fc2(x)
         x = F.log_softmax(x, dim=1)
         return x
-    
-def test_mnist_with_cpp_extension(device):   
+
+@pytest.mark.skip(reason="Does not support conv for now")
+def test_mnist_with_cpp_extension(device):
     model_name = "Mnist"
     transform = transforms.Compose([transforms.ToTensor()])
     test_dataset = datasets.MNIST(root="./data", train=False, transform=transform, download=True)
@@ -143,23 +173,21 @@ def test_mnist_with_cpp_extension(device):
     test_input = test_input.to(torch.bfloat16)
 
     # Copy weights and biases to ttnn
-    torch.utils.rename_privateuse1_backend('ttnn')
+    torch.utils.rename_privateuse1_backend("ttnn")
     ttnn_device = ttnn_module.custom_device_from_ttnn(device)
-    
 
-    
     option = torch_ttnn.TorchTtnnOption(
-                    device=device,
-                    gen_graphviz=False,
-                    run_mem_analysis=False,
-                    metrics_path=model_name,
-                    verbose=True,
-                )
+        device=device,
+        gen_graphviz=False,
+        run_mem_analysis=False,
+        metrics_path=model_name,
+        verbose=True,
+    )
 
     model = MnistModel()
     model = model.to(torch.bfloat16)
     test_input = test_input.to(ttnn_device)
     model.to(ttnn_device)
-            
+
     model = torch.compile(model, backend=torch_ttnn.backend, options=option)
-    results = model(test_input)
+    results = model(test_input)
@@ -3,6 +3,7 @@
 import collections
 import re
 from typing import List, Dict, Tuple
+from torch_ttnn.cpp_extension.custom_device_mode import ttnn_module, enable_ttnn_device
 
 
 class ModelTester:
@@ -130,6 +131,9 @@ def test_model_eval(self, as_ttnn=False, option=None):
         model = self.set_model_eval(self.model)
         inputs = self.set_inputs_eval(self.inputs)
         if as_ttnn == True:
+            torch.utils.rename_privateuse1_backend("ttnn")
+            ttnn_device = ttnn_module.custom_device_from_ttnn(option.device)
+            inputs = inputs.to(ttnn_device)
             model = self.compile_model(model, option)
         outputs = self.run_model(model, inputs)
         results = self.get_results_eval(model, inputs, outputs)
 
@@ -1,23 +1,14 @@
 #pragma once
 
-#include <ATen/OpaqueTensorImpl.h>
 #include "ttnn/tensor/tensor.hpp"
+#include "extension_utils.hpp"
 #include <iostream>
 #include <string.h>
 
-template <typename Arg, typename... Args>
-void doPrint(std::ostream& out, const std::string_view& filename, int lineno, const std::string_view& fn, Arg&& arg, Args&&... args)
-{
-  out << std::format("{}({})({}): ", filename, lineno, fn);
-  out << std::forward<Arg>(arg);
-  ((out << std::forward<Args>(args)), ...);
-  out << std::endl;
-}
-#define LOGGING(...) doPrint(std::cout, __FILE_NAME__, __LINE__, __FUNCTION__, __VA_ARGS__)
-
 namespace at {
 
 struct TtnnTensorImpl : public TensorImpl {
+  // TODO: Only difference is the storage type, combine these two
   TtnnTensorImpl(
       at::DispatchKeySet key_set,
       const caffe2::TypeMeta data_type,
@@ -55,8 +46,7 @@ struct TtnnTensorImpl : public TensorImpl {
   }
 
   ttnn::Tensor get_ttnn_tensor() {
-    // LOGGING(ttnn_tensor_string_);
-    LOGGING(ttnn_tensor_.write_to_string());
+    LOGGING("");
     return ttnn_tensor_;
   }
 
@@ -129,42 +119,9 @@ void shallow_copy_from(const c10::intrusive_ptr<TensorImpl>& impl) override {
       /*allow_tensor_metadata_change=*/allow_tensor_metadata_change());
   refresh_numel();
 }
-
-  // protected:
-  //   static void copy_tensor_metadata(
-  //     const TtnnTensorImpl* src_impl,
-  //     TtnnTensorImpl* dest_impl,
-  //     const c10::VariableVersion& version_counter,
-  //     bool allow_tensor_metadata_change) {
-  //     TensorImpl::copy_tensor_metadata(
-  //         src_impl,
-  //         dest_impl,
-  //         version_counter,
-  //         allow_tensor_metadata_change);
-
-  //     // TtnnTensorImpl-specific fields.
-  //     dest_impl->ttnn_tensor_ = src_impl->ttnn_tensor_;
-  //     dest_impl->ttnn_tensor_string_ = src_impl->ttnn_tensor_string_;
-  //   }
-
-  //   static void copy_tensor_metadata(
-  //     const TtnnTensorImpl* src_impl,
-  //     TtnnTensorImpl* dest_impl,
-  //     c10::VariableVersion&& version_counter,
-  //     bool allow_tensor_metadata_change) {
-  //       TensorImpl::copy_tensor_metadata(
-  //           src_impl,
-  //           dest_impl,
-  //           std::move(version_counter),
-  //           allow_tensor_metadata_change);
-
-  //     // TtnnTensorImpl-specific fields.
-  //     dest_impl->ttnn_tensor_ = src_impl->ttnn_tensor_;
-  //     dest_impl->ttnn_tensor_string_ = src_impl->ttnn_tensor_string_;
-  //   }
-
   private:
     ttnn::Tensor ttnn_tensor_;
+    // TODO: Debug only, should probably remove as it might be costly
     std::string ttnn_tensor_string_;
 };
 
 
@@ -6,7 +6,7 @@
 import glob
 import logging
 
-assert os.environ.get('TT_METAL_HOME') is not None
+assert os.environ.get("TT_METAL_HOME") is not None
 tt_metal_home = Path(os.environ["TT_METAL_HOME"])
 
 cpmcache_pattern = Path(".cpmcache/**/include")
@@ -34,7 +34,7 @@
     tt_metal_home / Path("tt_metal/api"),
     tt_metal_home / Path("tt_metal/tt_stl"),
     tt_metal_home / Path("tt_metal/include/tt_metal/internal"),
-    ] + cpmcache_dirs
+] + cpmcache_dirs
 ttnn_include_paths = [str(p) for p in ttnn_include_paths]
 
 # Load the C++ extension containing your custom kernels.
@@ -64,12 +64,13 @@
         str(working_directory / "open_registration_extension.cpp"),
     ],
     extra_include_paths=[str(working_directory)] + ttnn_include_paths,
-    extra_cflags=["-g", "-DFMT_HEADER_ONLY", '-std=c++20', '-stdlib=libc++'],
+    extra_cflags=["-g", "-DFMT_HEADER_ONLY", "-std=c++20", "-stdlib=libc++"],
     extra_ldflags=tt_metal_lib_paths + tt_metal_libs,
     verbose=True,
 )
 
-logging.info('Loaded custom extension.')
+logging.info("Loaded custom extension.")
+
 
 # The user will globally enable the below mode when calling this API
 def enable_ttnn_device():
@@ -78,6 +79,7 @@ def enable_ttnn_device():
     # If you want the mode to never be disabled, then this function shouldn't return anything.
     return m
 
+
 # This is a simple TorchFunctionMode class that:
 # (a) Intercepts all torch.* calls
 # (b) Checks for kwargs of the form `device="foo:i"`
@@ -87,14 +89,14 @@ class TtnnDeviceMode(TorchFunctionMode):
     def __torch_function__(self, func, types, args=(), kwargs=None):
         if kwargs is None:
             kwargs = {}
-        if 'device' in kwargs and 'ttnn' in kwargs['device']:
-            device_and_idx = kwargs['device'].split(':')
+        if "device" in kwargs and "ttnn" in kwargs["device"]:
+            device_and_idx = kwargs["device"].split(":")
             if len(device_and_idx) == 1:
                 # Case 1: No index specified
-                kwargs['device'] = ttnn_module.custom_device()
+                kwargs["device"] = ttnn_module.custom_device()
             else:
                 # Case 2: The user specified a device index.
                 device_idx = int(device_and_idx[1])
-                kwargs['device'] = ttnn_module.custom_device(device_idx)
+                kwargs["device"] = ttnn_module.custom_device(device_idx)
         with torch._C.DisableTorchFunction():
-            return func(*args, **kwargs)
+            return func(*args, **kwargs)