From aae7692dc5e629d89c9a95730eb798ca37827215 Mon Sep 17 00:00:00 2001
From: Evan Li <zewenl@nvidia.com>
Date: Tue, 11 Jun 2024 13:08:00 -0700
Subject: [PATCH 1/3] add example of Phi 3 vision model

---
 examples/dynamo/README.rst                   |  1 +
 examples/dynamo/torch_compile_phi3_vision.py | 65 ++++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 examples/dynamo/torch_compile_phi3_vision.py

diff --git a/examples/dynamo/README.rst b/examples/dynamo/README.rst
index c4d2baf0e4..d24ddd3845 100644
--- a/examples/dynamo/README.rst
+++ b/examples/dynamo/README.rst
@@ -22,3 +22,4 @@ Model Zoo
 * :ref:`_torch_export_llama2`: Compiling a Llama2 model using AOT workflow (`ir=dynamo`)
 * :ref:`_torch_export_sam2`: Compiling SAM2 model using AOT workflow (`ir=dynamo`)
 * :ref:`_torch_export_flux_dev`: Compiling FLUX.1-dev model using AOT workflow (`ir=dynamo`)
+* :ref:`_torch_compile_phi3_vision`: Compiling a Phi 3 vision model from Hugging Face using ``torch.compile``
\ No newline at end of file
diff --git a/examples/dynamo/torch_compile_phi3_vision.py b/examples/dynamo/torch_compile_phi3_vision.py
new file mode 100644
index 0000000000..9d77d32b32
--- /dev/null
+++ b/examples/dynamo/torch_compile_phi3_vision.py
@@ -0,0 +1,65 @@
+"""
+.. _torch_compile_phi3_vision:
+
+Compiling Phi 3 vision model from Hugging Face using the Torch-TensorRT `torch.compile` Backend
+======================================================
+
+This script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a Phi 3 vision model from Hugging Face.
+"""
+
+# %%
+# Imports and Model Definition
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+import requests
+import torch
+import torch_tensorrt
+from PIL import Image
+from transformers import AutoModelForCausalLM, AutoProcessor
+
+# %%
+# Load the pre-trained model weights from Hugging Face
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+model_id = "microsoft/Phi-3-vision-128k-instruct"
+processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id, trust_remote_code=True, torch_dtype="auto"
+).cuda()
+
+# %%
+# Compile the model with torch.compile, using Torch-TensorRT backend
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+model = torch.compile(model, backend="tensorrt")
+
+# %%
+# Write prompt and load image
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+user_prompt = "<|user|>\n"
+assistant_prompt = "<|assistant|>\n"
+prompt_suffix = "<|end|>\n"
+
+# single-image prompt
+prompt = f"{user_prompt}<|image_1|>\nWhat is shown in this image?{prompt_suffix}{assistant_prompt}"
+url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+print(f">>> Prompt\n{prompt}")
+
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
+
+# %%
+# Inference
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+generate_ids = model.generate(
+    **inputs,
+    max_new_tokens=1000,
+    eos_token_id=processor.tokenizer.eos_token_id,
+)
+generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]
+response = processor.batch_decode(
+    generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+)[0]
+print(f">>> Response\n{response}")

From 27e74762dafc0ef4ab9109ee506837fdbf711922 Mon Sep 17 00:00:00 2001
From: Evan Li <zewenl@nvidia.com>
Date: Wed, 19 Mar 2025 11:42:34 -0700
Subject: [PATCH 2/3] support phi4

---
 examples/dynamo/README.rst                                | 2 +-
 ...torch_compile_phi3_vision.py => torch_compile_phi4.py} | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)
 rename examples/dynamo/{torch_compile_phi3_vision.py => torch_compile_phi4.py} (87%)

diff --git a/examples/dynamo/README.rst b/examples/dynamo/README.rst
index d24ddd3845..17204da2ec 100644
--- a/examples/dynamo/README.rst
+++ b/examples/dynamo/README.rst
@@ -18,8 +18,8 @@ Model Zoo
 * :ref:`torch_compile_transformer`: Compiling a Transformer model using ``torch.compile``
 * :ref:`torch_compile_stable_diffusion`: Compiling a Stable Diffusion model using ``torch.compile``
 * :ref:`_torch_compile_gpt2`: Compiling a GPT2 model using ``torch.compile``
+* :ref:`_torch_compile_phi4`: Compiling a Phi4 model from Hugging Face using ``torch.compile``
 * :ref:`_torch_export_gpt2`: Compiling a GPT2 model using AOT workflow (`ir=dynamo`)
 * :ref:`_torch_export_llama2`: Compiling a Llama2 model using AOT workflow (`ir=dynamo`)
 * :ref:`_torch_export_sam2`: Compiling SAM2 model using AOT workflow (`ir=dynamo`)
 * :ref:`_torch_export_flux_dev`: Compiling FLUX.1-dev model using AOT workflow (`ir=dynamo`)
-* :ref:`_torch_compile_phi3_vision`: Compiling a Phi 3 vision model from Hugging Face using ``torch.compile``
\ No newline at end of file
diff --git a/examples/dynamo/torch_compile_phi3_vision.py b/examples/dynamo/torch_compile_phi4.py
similarity index 87%
rename from examples/dynamo/torch_compile_phi3_vision.py
rename to examples/dynamo/torch_compile_phi4.py
index 9d77d32b32..629f026adf 100644
--- a/examples/dynamo/torch_compile_phi3_vision.py
+++ b/examples/dynamo/torch_compile_phi4.py
@@ -1,10 +1,10 @@
 """
-.. _torch_compile_phi3_vision:
+.. _torch_compile_phi4:
 
-Compiling Phi 3 vision model from Hugging Face using the Torch-TensorRT `torch.compile` Backend
+Compiling Phi 4 model from Hugging Face using the Torch-TensorRT `torch.compile` Backend
 ======================================================
 
-This script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a Phi 3 vision model from Hugging Face.
+This script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a Phi 4 model from Hugging Face.
 """
 
 # %%
@@ -21,7 +21,7 @@
 # Load the pre-trained model weights from Hugging Face
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-model_id = "microsoft/Phi-3-vision-128k-instruct"
+model_id = "microsoft/Phi-4-multimodal-instruct"
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_id, trust_remote_code=True, torch_dtype="auto"

From 93004289e2cd486b73350502b0325907351a5869 Mon Sep 17 00:00:00 2001
From: Evan Li <zewenl@nvidia.com>
Date: Thu, 20 Mar 2025 11:34:25 -0700
Subject: [PATCH 3/3] fix

---
 examples/dynamo/torch_compile_phi4.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/examples/dynamo/torch_compile_phi4.py b/examples/dynamo/torch_compile_phi4.py
index 629f026adf..0c90702ac9 100644
--- a/examples/dynamo/torch_compile_phi4.py
+++ b/examples/dynamo/torch_compile_phi4.py
@@ -23,15 +23,23 @@
 
 model_id = "microsoft/Phi-4-multimodal-instruct"
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id, trust_remote_code=True, torch_dtype="auto"
-).cuda()
+model = (
+    AutoModelForCausalLM.from_pretrained(
+        model_id, trust_remote_code=True, torch_dtype="auto"
+    )
+    .eval()
+    .cuda()
+)
 
 # %%
 # Compile the model with torch.compile, using Torch-TensorRT backend
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-model = torch.compile(model, backend="tensorrt")
+model.forward = torch.compile(
+    model.forward,
+    backend="tensorrt",
+    options={"debug": True, "min_block_size": 1, "use_python_runtime": True},
+)
 
 # %%
 # Write prompt and load image