slight code reorg and bug correction for cross_compile

apbose · apbose · commit af6eb74011db · 2025-04-14T13:31:32.000-07:00
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -1206,7 +1206,7 @@ def save_cross_compiled_exported_program(
 
     from torch_tensorrt.dynamo._exporter import export
 
-    exp_program = export(gm, cross_compile_flag=True)
+    exp_program = export(gm, cross_compile_module=True)
     torch.export.save(exp_program, file_path)
     logger.debug(f"successfully saved the module for windows at {file_path}")
 
diff --git a/py/torch_tensorrt/dynamo/_exporter.py b/py/torch_tensorrt/dynamo/_exporter.py
@@ -22,23 +22,23 @@
 
 def export(
     gm: torch.fx.GraphModule,
-    cross_compile_flag: Optional[bool] = False,
+    cross_compile_module: Optional[bool] = False,
 ) -> ExportedProgram:
     """Export the result of TensorRT compilation into the desired output format.
 
     Arguments:
         gm (torch.fx.GraphModule): Compiled Torch-TensorRT module, generated by ``torch_tensorrt.dynamo.compile``
         inputs (torch.Tensor): Torch input tensors
-        cross_compile_flag (bool): Flag to indicated whether it is cross_compilation enabled or not
+        cross_compile_module (bool): Flag to indicated whether it is cross_compilation enabled or not
     """
-    patched_module = transform(gm, cross_compile_flag)
+    patched_module = transform(gm, cross_compile_module)
     exp_program = create_trt_exp_program(patched_module)
     return exp_program
 
 
 def transform(
     gm: torch.fx.GraphModule,
-    cross_compile_flag: Optional[bool] = False,
+    cross_compile_module: Optional[bool] = False,
 ) -> torch.fx.GraphModule:
     """
     Transforms the graphmodule by inlining Pytorch and TensorRT submodules.
@@ -48,7 +48,7 @@ def transform(
     Arguments:
         gm (torch.fx.GraphModule): Compiled Torch-TensorRT module, generated by ``torch_tensorrt.dynamo.compile``
         inputs (torch.Tensor): Torch input tensors
-        cross_compile_flag (bool): Flag to indicated whether it is cross_compilation enabled or not
+        cross_compile_module (bool): Flag to indicated whether it is cross_compilation enabled or not
 
     Returns an inlined torch.fx.GraphModule
     """
@@ -57,7 +57,7 @@ def transform(
     gm = copy.deepcopy(gm)
 
     # Inline TensorRT submodules
-    inline_trt_modules(gm, cross_compile_flag)
+    inline_trt_modules(gm, cross_compile_module)
 
     # Inline pytorch submodules
     inline_torch_modules(gm)
@@ -356,7 +356,7 @@ def create_trt_exp_program(
 
 
 def inline_trt_modules(
-    gm: torch.fx.GraphModule, cross_compile_flag: Optional[bool] = False
+    gm: torch.fx.GraphModule, cross_compile_module: Optional[bool] = False
 ) -> torch.fx.GraphModule:
     """
     Replace TRT submodules with trt engine nodes.
@@ -380,7 +380,16 @@ def inline_trt_modules(
         num_outputs = len(trt_module_node.meta["val"])
         # Insert a call_function node to perform inference on TRT engine
         with gm.graph.inserting_before(trt_module_node):
-            if not cross_compile_flag:
+            if cross_compile_module:
+                engine_info = trt_module._pack_engine_info()
+                engine_bytes = engine_info[ENGINE_IDX]
+                engine_info[ENGINE_IDX] = base64.b64encode(engine_bytes).decode("utf-8")
+                # insert the no_placeholder node in the graph which should be replaced to the actual execute_engine node while load in the windows
+                trt_node = gm.graph.call_function(
+                    torch.ops.tensorrt.no_op_placeholder_for_execute_engine.default,
+                    (trt_module_node.args, *engine_info),
+                )
+            else:
                 # for the normal workflow: use the execute_engine node
                 engine_name = f"{name}_engine"
                 setattr(gm, engine_name, trt_module.engine)
@@ -396,16 +405,6 @@ def inline_trt_modules(
                 engine_node.meta["val"] = CustomObjArgument(
                     name=engine_node.name, class_fqn=""
                 )
-            else:
-                # for the cross compile for windows workflow: use the no_op_placeholder node
-                engine_info = trt_module._pack_engine_info()
-                engine_bytes = engine_info[ENGINE_IDX]
-                engine_info[ENGINE_IDX] = base64.b64encode(engine_bytes).decode("utf-8")
-                # insert the no_placeholder node in the graph which should be replaced to the actual execute_engine node while load in the windows
-                trt_node = gm.graph.call_function(
-                    torch.ops.tensorrt.no_op_placeholder_for_execute_engine.default,
-                    (trt_module_node.args, *engine_info),
-                )
             # set trt_node.meta with trt_module_node.meta
             assert num_outputs > 0
             trt_node.meta["val"] = trt_module_node.meta["val"]
@@ -464,16 +463,10 @@ def replace_execute_engine_no_op_node(
                 name=engine_node.name, class_fqn=""
             )
 
-        if len(no_op_placeholder_node.meta["val"]) == 1:
-            with gm.graph.inserting_after(trt_node):
-                getitem_output = gm.graph.call_function(operator.getitem, (trt_node, 0))
-                getitem_output.meta["val"] = trt_node.meta["val"]
-            no_op_placeholder_node.replace_all_uses_with(getitem_output)
-        else:
-            no_op_placeholder_node.replace_all_uses_with(trt_node)
-            getitem_nodes = trt_node.users
-            for idx, getitem_node in enumerate(getitem_nodes):
-                getitem_node.meta["val"] = trt_node.meta["val"][idx]
+        no_op_placeholder_node.replace_all_uses_with(trt_node)
+        getitem_nodes = trt_node.users
+        for idx, getitem_node in enumerate(getitem_nodes):
+            getitem_node.meta["val"] = trt_node.meta["val"][idx]
 
         gm.graph.erase_node(no_op_placeholder_node)
 
diff --git a/py/torch_tensorrt/runtime/_utils.py b/py/torch_tensorrt/runtime/_utils.py
@@ -144,6 +144,7 @@ def no_op_placeholder_for_execute_engine(
     serialized_hardware_compatible: str,
     serialized_metadata: str,
     serialized_target_platform: str,
+    serialized_require_output_allocator: str,
 ) -> List[torch.Tensor]:
     raise RuntimeError(
         "The saved model is cross compiled for windows in Linux, should only be loadded in Windows via torch_tensorrt.load_cross_compiled_exported_program() api."