From aae7692dc5e629d89c9a95730eb798ca37827215 Mon Sep 17 00:00:00 2001 From: Evan Li Date: Tue, 11 Jun 2024 13:08:00 -0700 Subject: [PATCH 1/3] add example of Phi 3 vision model --- examples/dynamo/README.rst | 1 + examples/dynamo/torch_compile_phi3_vision.py | 65 ++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 examples/dynamo/torch_compile_phi3_vision.py diff --git a/examples/dynamo/README.rst b/examples/dynamo/README.rst index c4d2baf0e4..d24ddd3845 100644 --- a/examples/dynamo/README.rst +++ b/examples/dynamo/README.rst @@ -22,3 +22,4 @@ Model Zoo * :ref:`_torch_export_llama2`: Compiling a Llama2 model using AOT workflow (`ir=dynamo`) * :ref:`_torch_export_sam2`: Compiling SAM2 model using AOT workflow (`ir=dynamo`) * :ref:`_torch_export_flux_dev`: Compiling FLUX.1-dev model using AOT workflow (`ir=dynamo`) +* :ref:`_torch_compile_phi3_vision`: Compiling a Phi 3 vision model from Hugging Face using ``torch.compile`` \ No newline at end of file diff --git a/examples/dynamo/torch_compile_phi3_vision.py b/examples/dynamo/torch_compile_phi3_vision.py new file mode 100644 index 0000000000..9d77d32b32 --- /dev/null +++ b/examples/dynamo/torch_compile_phi3_vision.py @@ -0,0 +1,65 @@ +""" +.. _torch_compile_phi3_vision: + +Compiling Phi 3 vision model from Hugging Face using the Torch-TensorRT `torch.compile` Backend +====================================================== + +This script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a Phi 3 vision model from Hugging Face. +""" + +# %% +# Imports and Model Definition +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +import requests +import torch +import torch_tensorrt +from PIL import Image +from transformers import AutoModelForCausalLM, AutoProcessor + +# %% +# Load the pre-trained model weights from Hugging Face +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +model_id = "microsoft/Phi-3-vision-128k-instruct" +processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) +model = AutoModelForCausalLM.from_pretrained( + model_id, trust_remote_code=True, torch_dtype="auto" +).cuda() + +# %% +# Compile the model with torch.compile, using Torch-TensorRT backend +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +model = torch.compile(model, backend="tensorrt") + +# %% +# Write prompt and load image +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +user_prompt = "<|user|>\n" +assistant_prompt = "<|assistant|>\n" +prompt_suffix = "<|end|>\n" + +# single-image prompt +prompt = f"{user_prompt}<|image_1|>\nWhat is shown in this image?{prompt_suffix}{assistant_prompt}" +url = "https://www.ilankelman.org/stopsigns/australia.jpg" +print(f">>> Prompt\n{prompt}") + +image = Image.open(requests.get(url, stream=True).raw) +inputs = processor(prompt, image, return_tensors="pt").to("cuda:0") + +# %% +# Inference +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +generate_ids = model.generate( + **inputs, + max_new_tokens=1000, + eos_token_id=processor.tokenizer.eos_token_id, +) +generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :] +response = processor.batch_decode( + generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False +)[0] +print(f">>> Response\n{response}") From 27e74762dafc0ef4ab9109ee506837fdbf711922 Mon Sep 17 00:00:00 2001 From: Evan Li Date: Wed, 19 Mar 2025 11:42:34 -0700 Subject: [PATCH 2/3] support phi4 --- examples/dynamo/README.rst | 2 +- ...torch_compile_phi3_vision.py => torch_compile_phi4.py} | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) rename examples/dynamo/{torch_compile_phi3_vision.py => torch_compile_phi4.py} (87%) diff --git a/examples/dynamo/README.rst b/examples/dynamo/README.rst index d24ddd3845..17204da2ec 100644 --- a/examples/dynamo/README.rst +++ b/examples/dynamo/README.rst @@ -18,8 +18,8 @@ Model Zoo * :ref:`torch_compile_transformer`: Compiling a Transformer model using ``torch.compile`` * :ref:`torch_compile_stable_diffusion`: Compiling a Stable Diffusion model using ``torch.compile`` * :ref:`_torch_compile_gpt2`: Compiling a GPT2 model using ``torch.compile`` +* :ref:`_torch_compile_phi4`: Compiling a Phi4 model from Hugging Face using ``torch.compile`` * :ref:`_torch_export_gpt2`: Compiling a GPT2 model using AOT workflow (`ir=dynamo`) * :ref:`_torch_export_llama2`: Compiling a Llama2 model using AOT workflow (`ir=dynamo`) * :ref:`_torch_export_sam2`: Compiling SAM2 model using AOT workflow (`ir=dynamo`) * :ref:`_torch_export_flux_dev`: Compiling FLUX.1-dev model using AOT workflow (`ir=dynamo`) -* :ref:`_torch_compile_phi3_vision`: Compiling a Phi 3 vision model from Hugging Face using ``torch.compile`` \ No newline at end of file diff --git a/examples/dynamo/torch_compile_phi3_vision.py b/examples/dynamo/torch_compile_phi4.py similarity index 87% rename from examples/dynamo/torch_compile_phi3_vision.py rename to examples/dynamo/torch_compile_phi4.py index 9d77d32b32..629f026adf 100644 --- a/examples/dynamo/torch_compile_phi3_vision.py +++ b/examples/dynamo/torch_compile_phi4.py @@ -1,10 +1,10 @@ """ -.. _torch_compile_phi3_vision: +.. _torch_compile_phi4: -Compiling Phi 3 vision model from Hugging Face using the Torch-TensorRT `torch.compile` Backend +Compiling Phi 4 model from Hugging Face using the Torch-TensorRT `torch.compile` Backend ====================================================== -This script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a Phi 3 vision model from Hugging Face. +This script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a Phi 4 model from Hugging Face. """ # %% @@ -21,7 +21,7 @@ # Load the pre-trained model weights from Hugging Face # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -model_id = "microsoft/Phi-3-vision-128k-instruct" +model_id = "microsoft/Phi-4-multimodal-instruct" processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, torch_dtype="auto" From 93004289e2cd486b73350502b0325907351a5869 Mon Sep 17 00:00:00 2001 From: Evan Li Date: Thu, 20 Mar 2025 11:34:25 -0700 Subject: [PATCH 3/3] fix --- examples/dynamo/torch_compile_phi4.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/examples/dynamo/torch_compile_phi4.py b/examples/dynamo/torch_compile_phi4.py index 629f026adf..0c90702ac9 100644 --- a/examples/dynamo/torch_compile_phi4.py +++ b/examples/dynamo/torch_compile_phi4.py @@ -23,15 +23,23 @@ model_id = "microsoft/Phi-4-multimodal-instruct" processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) -model = AutoModelForCausalLM.from_pretrained( - model_id, trust_remote_code=True, torch_dtype="auto" -).cuda() +model = ( + AutoModelForCausalLM.from_pretrained( + model_id, trust_remote_code=True, torch_dtype="auto" + ) + .eval() + .cuda() +) # %% # Compile the model with torch.compile, using Torch-TensorRT backend # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -model = torch.compile(model, backend="tensorrt") +model.forward = torch.compile( + model.forward, + backend="tensorrt", + options={"debug": True, "min_block_size": 1, "use_python_runtime": True}, +) # %% # Write prompt and load image