various fixes to make et_export and et_wrapper work

metascroy · malfet · commit 6f1debcd2624 · 2024-07-16T22:58:42.000-07:00
diff --git a/README.md b/README.md
@@ -6,8 +6,8 @@ For a list of devices, see below, under *SUPPORTED SYSTEMS*
 
 A goal of this repo, and the design of the PT2 components was to offer seamless integration and consistent workflows.
 Both mobile and server/desktop paths start with torch.export() receiving the same model description.  Similarly,
-integration into runners for Python (for initial testing) and Python-free environments (for deployment, in runner-posix
-and runner-mobile, respectively) offer very consistent experiences across backends and offer developers consistent interfaces
+integration into runners for Python (for initial testing) and Python-free environments (for deployment, in runner-aoti
+and runner-et, respectively) offer a consistent experience across backends and offer developers consistent interfaces
 and user experience whether they target server, desktop or mobile & edge use cases, and/or all of them.
 
 
@@ -85,12 +85,14 @@ The environment variable MODEL_REPO should point to a directory with the `model.
 The command below will add the file "llama-fast.pte" to your MODEL_REPO directory.
 
 ```
-python et_export.py --checkpoint_path $MODEL_REPO/model.pth -d fp32 --xnnpack --out-path ${MODEL_REPO}
+python et_export.py --checkpoint_path $MODEL_REPO/model.pth -d fp32 --out-path ${MODEL_REPO}
 ```
 
-How do run is problematic -- I would love to run it with
+TODO(fix this): the export command works with "--xnnpack" flag, but the next generate.py command will not run it so we do not set it right now.
+
+To run the pte file, run this.  Note that this is very slow at the moment.
 ```
-python generate.py --pte ./${MODEL_REPO}.pte --prompt "Hello my name is" --device cpu
+python generate.py --checkpoint_path $MODEL_REPO/model.pth --pte $MODEL_REPO/llama-fast.pte --prompt "Hello my name is" --device cpu
 ```
 but *that requires xnnpack to work in python!*
 
@@ -233,6 +235,11 @@ List dependencies for these backends
 ### ExecuTorch
 Set up executorch by following the instructions [here](https://pytorch.org/executorch/stable/getting-started-setup.html#setting-up-executorch).
 
+Make sure when you run the installation script in the executorch repo, you enable pybind.
+```
+./install_requirements.sh --pybind
+```
+
 
 
 # Acknowledgements
diff --git a/et_export.py b/et_export.py
@@ -172,7 +172,11 @@ def export_model(model, device, output_path, args=None) -> str:  # noqa: C901
         )
     )
 
-    save_pte_program(export_program, "llama-fast", output_path)
+    print("The methods are: ", export_program.methods)
+    path = f"{output_path}/llama-fast.pte"
+    with open(path, "wb") as f:
+        export_program.write_to_file(f)
+    # save_pte_program(export_program, "llama-fast", output_path)
 
     return output_path
 
diff --git a/et_wrapper.py b/et_wrapper.py
@@ -9,13 +9,17 @@ class PTEModel(nn.Module):
     def __init__(self, config, path) -> None:
         super().__init__()
         self.config = config
-        self.model_ = exec_lib._load_for_executorch(path)
+        self.model_ = exec_lib._load_for_executorch(str(path))
 
-    defccorward(self, x, input_pos):
-        logits = module.forward(
-            x.to(torch.long),
-            input_pos.to(torch.long),
-        )
+    def forward(self, x, input_pos):
+        # model_.forward expects inputs to be wrapped in a tuple
+        forward_inputs = (x.to(torch.long), input_pos.to(torch.long))
+        logits = self.model_.forward(forward_inputs)
+
+        # After wrapping in a tuple, we get a list back, so we need to grab
+        # the first element to get the tensor
+        assert len(logits) == 1
+        logits = logits[0]
         return logits
 
     def setup_caches(self, max_batch_size, max_seq_length):

Original file line number	Diff line number	Diff line change
`@@ -172,7 +172,11 @@ def export_model(model, device, output_path, args=None) -> str: # noqa: C901`
`172`	`172`	`)`
`173`	`173`	`)`
`174`	`174`
`175`		`- save_pte_program(export_program, "llama-fast", output_path)`
	`175`	`+ print("The methods are: ", export_program.methods)`
	`176`	`+ path = f"{output_path}/llama-fast.pte"`
	`177`	`+ with open(path, "wb") as f:`
	`178`	`+ export_program.write_to_file(f)`
	`179`	`+ # save_pte_program(export_program, "llama-fast", output_path)`
`176`	`180`
`177`	`181`	`return output_path`
`178`	`182`