Added support to module with graph break

cehongwang · cehongwang · commit 8b92866f9b60 · 2025-03-27T14:07:07.000Z
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -37,6 +37,7 @@
     pre_export_lowering,
 )
 from torch_tensorrt.dynamo.utils import (
+    CPU_DEVICE,
     get_flat_args_with_check,
     get_output_metadata,
     parse_graph_io,
@@ -676,7 +677,6 @@ def compile(
 
     gm = exported_program.module()
     # Move the weights in the state_dict to CPU
-    exported_program.module().to("cpu")
     logger.info(
         "The model is moved to CPU during compilation. If you want to keep the model on GPU, call module.to('cuda') on the model after compilation."
     )
@@ -686,6 +686,7 @@ def compile(
     gm = post_lowering(gm, settings)
     logger.debug("Lowered Input graph: " + str(gm.graph))
 
+    exported_program.module().to(CPU_DEVICE)
     trt_gm = compile_module(
         gm, trt_arg_inputs, trt_kwarg_inputs, settings, engine_cache
     )
@@ -830,6 +831,7 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
                 str(name),
                 str(submodule.graph),
             )
+            submodule.to(torch.cuda.current_device())
             continue
 
         if name not in submodule_node_dict: