tenstorrent
diff --git a/‎tests/cpp_extension/test_cpp_extension.py
Lines changed: 165 additions & 0 deletions b/‎tests/cpp_extension/test_cpp_extension.py
Lines changed: 165 additions & 0 deletions
diff --git a/‎torch_ttnn/cpp_extension/TtnnOpaqueTensorImpl.h
Lines changed: 171 additions & 0 deletions b/‎torch_ttnn/cpp_extension/TtnnOpaqueTensorImpl.h
Lines changed: 171 additions & 0 deletions
@@ -0,0 +1,165 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import transforms, datasets
+from torch.utils.data import DataLoader
+import ttnn
+import torch_ttnn
+from torch_ttnn.cpp_extension.custom_device_mode import ttnn_module, enable_ttnn_device
+import pytest
+
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+
+import logging
+import sys
+
+@pytest.mark.parametrize(
+    "input_shape",
+    ((32, 1, 3, 3), (32,)),
+)
+def test_cpp_extension(device, input_shape):
+    torch.utils.rename_privateuse1_backend('ttnn')
+
+    # in pytest the device has already been initialized before this call
+    # so instead we can wrap this around the custom device
+    ttnn_device = ttnn_module.custom_device_from_ttnn(device)
+
+    logging.info("Creating bfloat tensor from -1 to 1")
+    torch_tensor = torch.empty(input_shape, dtype = torch.bfloat16).uniform_(-1, 1)
+    print(torch_tensor)
+    torch_tensor_abs = torch.abs(torch_tensor)
+    print(torch_tensor_abs)
+
+    logging.info("Transferring to ttnn")
+    torch_ttnn_tensor = torch_tensor.to(ttnn_device)
+
+    logging.info("get underlying ttnn tensor")
+    ttnn_tensor = ttnn_module.get_ttnn_tensor(torch_ttnn_tensor)
+
+    logging.info("Running abs on ttnn")
+    ttnn_tensor = ttnn.abs(ttnn_tensor)
+
+    logging.info("calling to_torch")
+    ttnn_to_torch = ttnn.to_torch(ttnn_tensor)
+    print(ttnn_to_torch)
+    
+    
+    assert torch.allclose(torch_tensor_abs, ttnn_to_torch, rtol=0.1, atol=0.1)
+
+    # logging.info("Closing device")
+    # ttnn_module.close_custom_device(ttnn_device)
+
+def test_bert_with_cpp_extension(device):
+    model_name = "phiyodr/bert-large-finetuned-squad2"
+    tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", torch_dtype=torch.bfloat16)
+    m = AutoModelForQuestionAnswering.from_pretrained(model_name, torch_dtype=torch.bfloat16)
+    context = 'Johann Joachim Winckelmann was a German art historian and archaeologist. He was a pioneering Hellenist who first articulated the difference between Greek, Greco-Roman and Roman art. "The prophet and founding hero of modern archaeology", Winckelmann was one of the founders of scientific archaeology and first applied the categories of style on a large, systematic basis to the history of art. '
+    question = "What discipline did Winkelmann create?"
+    inputs = tokenizer.encode_plus(
+        question,
+        context,
+        add_special_tokens=True,
+        return_tensors="pt",
+        max_length=256,
+        padding="max_length",
+        truncation=True,
+    )
+
+    option = torch_ttnn.TorchTtnnOption(
+                    device=device,
+                    gen_graphviz=False,
+                    run_mem_analysis=False,
+                    metrics_path=model_name,
+                    verbose=True,
+                )
+
+    # custom device
+    torch.utils.rename_privateuse1_backend('ttnn')
+    ttnn_device = ttnn_module.custom_device_from_ttnn(device)
+    
+    # clone input_ids on cpu since this the data transfer is somehow inplace?
+    input_ids = inputs.input_ids.clone()
+    
+    inputs = inputs.to(ttnn_device)
+    # modules are inplace, tensors are not
+    m.to(ttnn_device)
+
+    model = torch.compile(m, backend=torch_ttnn.backend, options=option)
+    outputs = model(**inputs)
+    
+    # Helper function to decode output to human-readable text
+    def decode_output(outputs):
+        response_start = torch.argmax(outputs.start_logits)
+        response_end = torch.argmax(outputs.end_logits) + 1
+        response_tokens = input_ids[0, response_start:response_end]
+        return tokenizer.decode(response_tokens)
+
+    print("finished:")
+    print(outputs)
+    answer = decode_output(outputs)
+
+    print(
+        f"""
+    model_name: {model_name}
+    input:
+        context: {context}
+        question: {question}
+    answer: {answer}
+    """
+    )
+
+# adapted from https://github.com/pytorch/examples/blob/main/mnist/main.py
+class MnistModel(torch.nn.Module):
+    def __init__(self):      
+        super(MnistModel, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, 3, 1)
+        self.conv2 = nn.Conv2d(32, 64, 3, 1)
+        self.dropout1 = nn.Dropout(0.25)
+        self.dropout2 = nn.Dropout(0.5)
+        self.fc1 = nn.Linear(9216, 128)
+        self.fc2 = nn.Linear(128, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2)
+        x = self.dropout1(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout2(x)
+        x = self.fc2(x)
+        x = F.log_softmax(x, dim=1)
+        return x
+    
+def test_mnist_with_cpp_extension(device):   
+    model_name = "Mnist"
+    transform = transforms.Compose([transforms.ToTensor()])
+    test_dataset = datasets.MNIST(root="./data", train=False, transform=transform, download=True)
+    dataloader = DataLoader(test_dataset, batch_size=1)
+    test_input, _ = next(iter(dataloader))
+    test_input = test_input.to(torch.bfloat16)
+
+    # Copy weights and biases to ttnn
+    torch.utils.rename_privateuse1_backend('ttnn')
+    ttnn_device = ttnn_module.custom_device_from_ttnn(device)
+    
+
+    
+    option = torch_ttnn.TorchTtnnOption(
+                    device=device,
+                    gen_graphviz=False,
+                    run_mem_analysis=False,
+                    metrics_path=model_name,
+                    verbose=True,
+                )
+
+    model = MnistModel()
+    model = model.to(torch.bfloat16)
+    test_input = test_input.to(ttnn_device)
+    model.to(ttnn_device)
+            
+    model = torch.compile(model, backend=torch_ttnn.backend, options=option)
+    results = model(test_input)
@@ -0,0 +1,171 @@
+#pragma once
+
+#include <ATen/OpaqueTensorImpl.h>
+#include "ttnn/tensor/tensor.hpp"
+#include <iostream>
+#include <string.h>
+
+template <typename Arg, typename... Args>
+void doPrint(std::ostream& out, const std::string_view& filename, int lineno, const std::string_view& fn, Arg&& arg, Args&&... args)
+{
+  out << std::format("{}({})({}): ", filename, lineno, fn);
+  out << std::forward<Arg>(arg);
+  ((out << std::forward<Args>(args)), ...);
+  out << std::endl;
+}
+#define LOGGING(...) doPrint(std::cout, __FILE_NAME__, __LINE__, __FUNCTION__, __VA_ARGS__)
+
+namespace at {
+
+struct TtnnTensorImpl : public TensorImpl {
+  TtnnTensorImpl(
+      at::DispatchKeySet key_set,
+      const caffe2::TypeMeta data_type,
+      c10::Device device,
+      ttnn::Tensor& ttnn_tensor,
+      c10::intrusive_ptr<c10::StorageImpl> storage) : TensorImpl(key_set, data_type, device), ttnn_tensor_(ttnn_tensor), ttnn_tensor_string_(ttnn_tensor.write_to_string()) {
+        storage_ = std::move(storage);
+        auto view = ttnn_tensor_.get_logical_shape().view();
+        std::vector<int64_t> view_int64;
+        std::copy(view.begin(), view.end(), std::back_inserter(view_int64));
+        IntArrayRef int_array_ref(&(*view_int64.begin()), &(*view_int64.end()));
+        sizes_and_strides_.set_sizes(int_array_ref);
+      }
+
+  TtnnTensorImpl(
+    at::DispatchKeySet key_set,
+    const caffe2::TypeMeta data_type,
+    c10::Device device,
+    const ttnn::Tensor& ttnn_tensor,
+    const Storage& storage) : TensorImpl(key_set, data_type, device), ttnn_tensor_(ttnn_tensor), ttnn_tensor_string_(ttnn_tensor.write_to_string()) {
+      storage_ = std::move(storage);
+      auto view = ttnn_tensor_.get_logical_shape().view();
+      std::vector<int64_t> view_int64;
+      std::copy(view.begin(), view.end(), std::back_inserter(view_int64));
+      IntArrayRef int_array_ref(&(*view_int64.begin()), &(*view_int64.end()));
+      sizes_and_strides_.set_sizes(int_array_ref);
+    }
+
+  void set_sizes_and_strides(const IntArrayRef& int_array_ref) {
+      sizes_and_strides_.set_sizes(int_array_ref);
+  }
+
+  void set_sizes_and_strides_as(const at::Tensor& the_template) {
+    sizes_and_strides_.set_sizes(the_template.sizes());
+  }
+
+  ttnn::Tensor get_ttnn_tensor() {
+    // LOGGING(ttnn_tensor_string_);
+    LOGGING(ttnn_tensor_.write_to_string());
+    return ttnn_tensor_;
+  }
+
+  void set_ttnn_tensor(const ttnn::Tensor& tensor) {
+    ttnn_tensor_ = tensor;
+  }
+
+  /**
+   * Return a TensorImpl that is a shallow-copy of this TensorImpl.
+   *
+   * For usage of `version_counter` and `allow_tensor_metadata_change`,
+   * see NOTE [ TensorImpl Shallow-Copying ].
+   */
+  c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
+    const c10::VariableVersion& version_counter,
+    bool allow_tensor_metadata_change) const override {
+    auto impl = c10::make_intrusive<TtnnTensorImpl>(
+        key_set(),
+        dtype(),
+        device(),
+        ttnn_tensor_,
+        storage_);
+    copy_tensor_metadata(
+        /*src_opaque_impl=*/this,
+        /*dest_opaque_impl=*/impl.get(),
+        /*version_counter=*/version_counter,
+        /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
+    impl->refresh_numel();
+    return impl;
+}
+
+  /**
+   * Return a TensorImpl that is a shallow-copy of this TensorImpl.
+   *
+   * For usage of `version_counter` and `allow_tensor_metadata_change`,
+   * see NOTE [ TensorImpl Shallow-Copying ].
+   */
+  c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
+    c10::VariableVersion&& version_counter,
+    bool allow_tensor_metadata_change) const override {
+  auto impl = c10::make_intrusive<TtnnTensorImpl>(
+      key_set(),
+      dtype(),
+      device(),
+      ttnn_tensor_,
+      storage_);
+  copy_tensor_metadata(
+      /*src_opaque_impl=*/this,
+      /*dest_opaque_impl=*/impl.get(),
+      /*version_counter=*/std::move(version_counter),
+      /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
+  impl->refresh_numel();
+  return impl;
+}
+
+/**
+ * Shallow-copies data from another TensorImpl into this TensorImpl.
+ *
+ * For why this function doesn't check this TensorImpl's
+ * `allow_tensor_metadata_change_`, see NOTE [ TensorImpl Shallow-Copying ].
+ */
+void shallow_copy_from(const c10::intrusive_ptr<TensorImpl>& impl) override {
+  AT_ASSERT(has_compatible_shallow_copy_type(impl->key_set()));
+  auto ttnn_impl =
+      static_cast<const TtnnTensorImpl*>(impl.get());
+  copy_tensor_metadata(
+      /*src_impl=*/ttnn_impl,
+      /*dest_impl=*/this,
+      /*version_counter=*/version_counter(),
+      /*allow_tensor_metadata_change=*/allow_tensor_metadata_change());
+  refresh_numel();
+}
+
+  // protected:
+  //   static void copy_tensor_metadata(
+  //     const TtnnTensorImpl* src_impl,
+  //     TtnnTensorImpl* dest_impl,
+  //     const c10::VariableVersion& version_counter,
+  //     bool allow_tensor_metadata_change) {
+  //     TensorImpl::copy_tensor_metadata(
+  //         src_impl,
+  //         dest_impl,
+  //         version_counter,
+  //         allow_tensor_metadata_change);
+
+  //     // TtnnTensorImpl-specific fields.
+  //     dest_impl->ttnn_tensor_ = src_impl->ttnn_tensor_;
+  //     dest_impl->ttnn_tensor_string_ = src_impl->ttnn_tensor_string_;
+  //   }
+
+  //   static void copy_tensor_metadata(
+  //     const TtnnTensorImpl* src_impl,
+  //     TtnnTensorImpl* dest_impl,
+  //     c10::VariableVersion&& version_counter,
+  //     bool allow_tensor_metadata_change) {
+  //       TensorImpl::copy_tensor_metadata(
+  //           src_impl,
+  //           dest_impl,
+  //           std::move(version_counter),
+  //           allow_tensor_metadata_change);
+
+  //     // TtnnTensorImpl-specific fields.
+  //     dest_impl->ttnn_tensor_ = src_impl->ttnn_tensor_;
+  //     dest_impl->ttnn_tensor_string_ = src_impl->ttnn_tensor_string_;
+  //   }
+
+  private:
+    ttnn::Tensor ttnn_tensor_;
+    std::string ttnn_tensor_string_;
+};
+
+} // namespace at