Generate metrics data with pydantic models (#51)

kevinwuTT · web-flow · commit 54e45e38d0c7 · 2024-07-25T15:15:48.000Z
* Generate aten op conversion status tables for each model

* Generate data with pydantic models

* Move pydantic model schema to a separate file
diff --git a/README.md b/README.md
@@ -8,17 +8,17 @@ This project allows to run PyTorch code on [Tenstorrent](https://tenstorrent.com
 
 The table below summarizes the results of running various ML models through our TTNN compiler. For each model, we track whether the run was successful, the number of operations before and after conversion, the number of `to_device` and `from_device` operations, performance metrics, and accuracy.
 
-| Model                               | Run Success   | Torch Ops Before (Unique Ops)   | Torch Ops Remain (Unique Ops)   | To/From Device Ops   |   Original Run Time (s) | Compiled Run Time(s)   | Accuracy   |
-|:------------------------------------|:--------------|:--------------------------------|:--------------------------------|:---------------------|------------------------:|:-----------------------|:-----------|
-| [Mnist (Eval)](tests/models/mnist)  | ✘             | 14 (8)                          | 5 (4)                           | 12                   |                    0.01 | N/A                    | N/A        |
-| [Mnist (Train)](tests/models/mnist) | ✅            | 14 (8)                          | 7 (5)                           | 14                   |                    0.01 | 2.52                   | 0.64       |
-| [ResNet18](tests/models/resnet)     | ✅            | 70 (9)                          | 42 (4)                          | 42                   |                    1.78 | 9.46                   | 1.0        |
-| [Bloom](tests/models/bloom)         | ✘             | N/A                             | N/A                             | N/A                  |                    5.58 | N/A                    | N/A        |
-| [YOLOS](tests/models/yolos)         | ✘             | N/A                             | N/A                             | N/A                  |                    0.18 | N/A                    | N/A        |
-| [Llama](tests/models/llama)         | ✘             | 3 (3)                           | 1 (1)                           | 5                    |                   38.21 | N/A                    | N/A        |
-| [BERT](tests/models/bert)           | ✅            | 1393 (21)                       | 489 (4)                         | 1340                 |                   62    | 36.17                  | 0.99       |
-| [Falcon](tests/models/falcon)       | ✘             | 3 (3)                           | 1 (1)                           | 5                    |                   34.81 | N/A                    | N/A        |
-| [GPT-2](tests/models/gpt2)          | ✘             | N/A                             | N/A                             | N/A                  |                    1.04 | N/A                    | N/A        |
+| Model                               | Run Success   | Torch Ops Before (Unique Ops)   | Torch Ops Remain (Unique Ops)   | To/From Device Ops   |   Original Run Time (ms) | Compiled Run Time (ms)   | Accuracy (%)   |
+|:------------------------------------|:--------------|:--------------------------------|:--------------------------------|:---------------------|-------------------------:|:-------------------------|:---------------|
+| [Mnist (Eval)](tests/models/mnist)  | ✘             | 14 (8)                          | 5 (4)                           | 12                   |                    11.04 | N/A                      | N/A            |
+| [Mnist (Train)](tests/models/mnist) | ✅            | 14 (8)                          | 7 (5)                           | 14                   |                    18.01 | 2922.51                  | 85.88          |
+| [ResNet18](tests/models/resnet)     | ✅            | 70 (9)                          | 42 (4)                          | 45                   |                  1772.4  | 8398.87                  | 99.99          |
+| [Bloom](tests/models/bloom)         | ✘             | 1407 (29)                       | N/A                             | N/A                  |                  5602.6  | N/A                      | N/A            |
+| [YOLOS](tests/models/yolos)         | ✘             | 964 (28)                        | N/A                             | N/A                  |                   209.04 | N/A                      | N/A            |
+| [Llama](tests/models/llama)         | ✘             | 3 (3)                           | 1 (1)                           | 5                    |                 38255.4  | N/A                      | N/A            |
+| [BERT](tests/models/bert)           | ✅            | 1393 (21)                       | 537 (4)                         | 1388                 |                 61919.4  | 52814.88                 | 98.64          |
+| [Falcon](tests/models/falcon)       | ✘             | 3 (3)                           | 1 (1)                           | 5                    |                 35014.3  | N/A                      | N/A            |
+| [GPT-2](tests/models/gpt2)          | ✘             | 748 (31)                        | N/A                             | N/A                  |                  1033.47 | N/A                      | N/A            |
 
 ### Explanation of Metrics
 
@@ -27,12 +27,88 @@ The table below summarizes the results of running various ML models through our
 **Torch Ops Before (Unique Ops)**: The total number of operations used by the model in the original Torch implementation. The number in parenthesis represents the total unique ops.  
 **Torch Ops Remain (Unique Ops)**: The total number of operations used after conversion to TTNN. The number in parenthesis represents the total unique ops.  
 **To/From Device Ops**: The number of `to/from_device` operations (data transfer to/from the device).  
-**Original Run Time (s)**: Execution time (in seconds) of the model before conversion.  
-**Compiled Run Time(s)**: Execution time (in seconds) of the model after conversion.  
-**Accuracy**: Model accuracy on a predefined test dataset after conversion.  
+**Original Run Time (ms)**: Execution time (in seconds) of the model before conversion.  
+**Compiled Run Time (ms)**: Execution time (in seconds) of the model after conversion.  
+**Accuracy (%)**: Model accuracy on a predefined test dataset after conversion.  
 ***
 **NOTE:** The total number of ops currently reflect only the first graph of a model. This will be fixed in a future update to include all graphs.  
 
+***
+
+### Op conversion status per model
+
+#### Mnist (Eval)
+| aten ops                             | status   |   count |
+|:-------------------------------------|:---------|--------:|
+| aten._log_softmax.default            | ✘        |       1 |
+| aten.addmm.default                   | ✅       |       2 |
+| aten.clone.default                   | ✅       |       2 |
+| aten.convolution.default             | ✘        |       2 |
+| aten.max_pool2d_with_indices.default | ✘        |       1 |
+| aten.relu.default                    | ✅       |       3 |
+| aten.t.default                       | ✅       |       2 |
+| aten.view.default                    | ✘        |       1 |
+#### Mnist (Train)
+| aten ops                             | status   |   count |
+|:-------------------------------------|:---------|--------:|
+| aten._log_softmax.default            | ✘        |       1 |
+| aten.addmm.default                   | ✅       |       2 |
+| aten.convolution.default             | ✘        |       2 |
+| aten.max_pool2d_with_indices.default | ✘        |       1 |
+| aten.native_dropout.default          | ✘        |       2 |
+| aten.relu.default                    | ✅       |       3 |
+| aten.t.default                       | ✅       |       2 |
+| aten.view.default                    | ✘        |       1 |
+#### ResNet18
+| aten ops                                          | status   |   count |
+|:--------------------------------------------------|:---------|--------:|
+| aten._native_batch_norm_legit_no_training.default | ✘        |      20 |
+| aten.add.Tensor                                   | ✅       |       8 |
+| aten.addmm.default                                | ✅       |       1 |
+| aten.convolution.default                          | ✘        |      20 |
+| aten.max_pool2d_with_indices.default              | ✘        |       1 |
+| aten.mean.dim                                     | ✅       |       1 |
+| aten.relu.default                                 | ✅       |      17 |
+| aten.t.default                                    | ✅       |       1 |
+| aten.view.default                                 | ✘        |       1 |
+#### Llama
+| aten ops               | status   |   count |
+|:-----------------------|:---------|--------:|
+| aten.arange.start      | ✘        |       1 |
+| aten.embedding.default | ✅       |       1 |
+| aten.unsqueeze.default | ✅       |       1 |
+#### BERT
+| aten ops                       | status   |   count |
+|:-------------------------------|:---------|--------:|
+| aten._softmax.default          | ✅       |      24 |
+| aten._to_copy.default          | ✅       |       1 |
+| aten.add.Tensor                | ✅       |      74 |
+| aten.addmm.default             | ✅       |     145 |
+| aten.bmm.default               | ✅       |      48 |
+| aten.clone.default             | ✅       |      99 |
+| aten.div.Tensor                | ✅       |      24 |
+| aten.embedding.default         | ✅       |       3 |
+| aten.expand.default            | ✅       |      96 |
+| aten.gelu.default              | ✅       |      24 |
+| aten.mul.Tensor                | ✅       |       1 |
+| aten.native_layer_norm.default | ✅       |      49 |
+| aten.permute.default           | ✅       |      96 |
+| aten.rsub.Scalar               | ✅       |       1 |
+| aten.slice.Tensor              | ✘        |       4 |
+| aten.split.Tensor              | ✘        |       1 |
+| aten.squeeze.dim               | ✘        |       2 |
+| aten.t.default                 | ✅       |     145 |
+| aten.transpose.int             | ✅       |      24 |
+| aten.unsqueeze.default         | ✅       |       2 |
+| aten.view.default              | ✘        |     530 |
+#### Falcon
+| aten ops               | status   |   count |
+|:-----------------------|:---------|--------:|
+| aten.arange.start      | ✘        |       1 |
+| aten.embedding.default | ✅       |       1 |
+| aten.unsqueeze.default | ✅       |       1 |
+
+
 ## Quickstart
 
 The `torch_ttnn` module has a `backend` function, which can be used with the `torch.compile()`.
diff --git a/docs/README.md.in b/docs/README.md.in
@@ -12,6 +12,12 @@ The table below summarizes the results of running various ML models through our
 
 {explanations_md}
 
+***
+
+### Op conversion status per model
+
+{aten_ops_md}
+
 ## Quickstart
 
 The `torch_ttnn` module has a `backend` function, which can be used with the `torch.compile()`.
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -5,3 +5,4 @@ pre-commit==3.0.4
 transformers==4.38.0
 pandas==2.0.3
 Pillow==10.3.0
+pydantic==2.8.2
diff --git a/tests/models/bert/test_bert.py b/tests/models/bert/test_bert.py
@@ -47,7 +47,9 @@ def decode_output(outputs):
     answer_before = decode_output(outputs_before)
 
     # Compile model with ttnn backend
-    option = torch_ttnn.TorchTtnnOption(device=device, metrics_path=metrics_path)
+    option = torch_ttnn.TorchTtnnOption(
+        device=device, gen_graphviz=True, metrics_path=metrics_path
+    )
     m = torch.compile(m, backend=torch_ttnn.backend, options=option)
 
     # Run inference with the compiled model
diff --git a/tests/models/bloom/test_bloom.py b/tests/models/bloom/test_bloom.py
@@ -35,7 +35,9 @@ def decode_output(outputs):
     decoded_output_before = decode_output(outputs_before)
 
     # Compile model with ttnn backend
-    option = torch_ttnn.TorchTtnnOption(device=device, metrics_path=metrics_path)
+    option = torch_ttnn.TorchTtnnOption(
+        device=device, gen_graphviz=True, metrics_path=metrics_path
+    )
     m = torch.compile(m, backend=torch_ttnn.backend, options=option)
 
     # Run inference with the compiled model
diff --git a/tests/models/falcon/test_falcon.py b/tests/models/falcon/test_falcon.py
@@ -35,7 +35,9 @@ def decode_output(outputs):
     decoded_output_before = decode_output(outputs_before)
 
     # Compile model with ttnn backend
-    option = torch_ttnn.TorchTtnnOption(device=device, metrics_path=metrics_path)
+    option = torch_ttnn.TorchTtnnOption(
+        device=device, gen_graphviz=True, metrics_path=metrics_path
+    )
     m = torch.compile(m, backend=torch_ttnn.backend, options=option)
 
     # Run inference with the compiled model
diff --git a/tests/models/gpt2/test_gpt2.py b/tests/models/gpt2/test_gpt2.py
@@ -36,7 +36,9 @@ def decode_output(outputs):
     decoded_output_before = decode_output(outputs_before)
 
     # Compile model with ttnn backend
-    option = torch_ttnn.TorchTtnnOption(device=device, metrics_path=metrics_path)
+    option = torch_ttnn.TorchTtnnOption(
+        device=device, gen_graphviz=True, metrics_path=metrics_path
+    )
     m = torch.compile(m, backend=torch_ttnn.backend, options=option)
 
     # Run inference with the compiled model
diff --git a/tests/models/llama/test_llama.py b/tests/models/llama/test_llama.py
@@ -35,7 +35,9 @@ def decode_output(outputs):
     decoded_output_before = decode_output(outputs_before)
 
     # Compile model with ttnn backend
-    option = torch_ttnn.TorchTtnnOption(device=device, metrics_path=metrics_path)
+    option = torch_ttnn.TorchTtnnOption(
+        device=device, gen_graphviz=True, metrics_path=metrics_path
+    )
     m = torch.compile(m, backend=torch_ttnn.backend, options=option)
 
     # Run inference with the compiled model
diff --git a/tests/models/mnist/test_mnist.py b/tests/models/mnist/test_mnist.py
@@ -57,7 +57,9 @@ def test_mnist_train(device):
     )
 
     # Compile model with ttnn backend
-    option = torch_ttnn.TorchTtnnOption(device=device, metrics_path=metrics_path)
+    option = torch_ttnn.TorchTtnnOption(
+        device=device, gen_graphviz=True, metrics_path=metrics_path
+    )
     m = torch.compile(m, backend=torch_ttnn.backend, options=option)
 
     # Run train with the compiled model
@@ -93,7 +95,9 @@ def test_mnist_eval(device):
         )
 
     # Compile model with ttnn backend
-    option = torch_ttnn.TorchTtnnOption(device=device, metrics_path=metrics_path)
+    option = torch_ttnn.TorchTtnnOption(
+        device=device, gen_graphviz=True, metrics_path=metrics_path
+    )
     m = torch.compile(m, backend=torch_ttnn.backend, options=option)
 
     # Run inference with the compiled model
diff --git a/tests/models/resnet/test_resnet.py b/tests/models/resnet/test_resnet.py
@@ -23,7 +23,9 @@ def test_resnet(device):
         )
 
     # Compile the model
-    option = torch_ttnn.TorchTtnnOption(device=device, metrics_path=metrics_path)
+    option = torch_ttnn.TorchTtnnOption(
+        device=device, gen_graphviz=True, metrics_path=metrics_path
+    )
     option.gen_graphviz = True
     model = torch.compile(model, backend=torch_ttnn.backend, options=option)
 
diff --git a/tests/models/yolos/test_yolos.py b/tests/models/yolos/test_yolos.py
@@ -42,7 +42,9 @@ def decode_output(outputs):
     decoded_output_before = decode_output(outputs_before)
 
     # Compile model with ttnn backend
-    option = torch_ttnn.TorchTtnnOption(device=device, metrics_path=metrics_path)
+    option = torch_ttnn.TorchTtnnOption(
+        device=device, gen_graphviz=True, metrics_path=metrics_path
+    )
     m = torch.compile(m, backend=torch_ttnn.backend, options=option)
 
     # Run inference with the compiled model
diff --git a/tools/collect_metrics.py b/tools/collect_metrics.py
diff --git a/tools/data_collection/pydantic_models.py b/tools/data_collection/pydantic_models.py
diff --git a/torch_ttnn/backend.py b/torch_ttnn/backend.py
diff --git a/torch_ttnn/fx_graphviz.py b/torch_ttnn/fx_graphviz.py
diff --git a/torch_ttnn/metrics.py b/torch_ttnn/metrics.py

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,9 @@ def test_resnet(device):`
`23`	`23`	`)`
`24`	`24`
`25`	`25`	`# Compile the model`
`26`		`- option = torch_ttnn.TorchTtnnOption(device=device, metrics_path=metrics_path)`
	`26`	`+ option = torch_ttnn.TorchTtnnOption(`
	`27`	`+ device=device, gen_graphviz=True, metrics_path=metrics_path`
	`28`	`+ )`
`27`	`29`	`option.gen_graphviz = True`
`28`	`30`	`model = torch.compile(model, backend=torch_ttnn.backend, options=option)`
`29`	`31`