diff --git a/examples/dynamo/README.rst b/examples/dynamo/README.rst index c4d2baf0e4..17204da2ec 100644 --- a/examples/dynamo/README.rst +++ b/examples/dynamo/README.rst @@ -18,6 +18,7 @@ Model Zoo * :ref:`torch_compile_transformer`: Compiling a Transformer model using ``torch.compile`` * :ref:`torch_compile_stable_diffusion`: Compiling a Stable Diffusion model using ``torch.compile`` * :ref:`_torch_compile_gpt2`: Compiling a GPT2 model using ``torch.compile`` +* :ref:`_torch_compile_phi4`: Compiling a Phi4 model from Hugging Face using ``torch.compile`` * :ref:`_torch_export_gpt2`: Compiling a GPT2 model using AOT workflow (`ir=dynamo`) * :ref:`_torch_export_llama2`: Compiling a Llama2 model using AOT workflow (`ir=dynamo`) * :ref:`_torch_export_sam2`: Compiling SAM2 model using AOT workflow (`ir=dynamo`) diff --git a/examples/dynamo/torch_compile_phi4.py b/examples/dynamo/torch_compile_phi4.py new file mode 100644 index 0000000000..0c90702ac9 --- /dev/null +++ b/examples/dynamo/torch_compile_phi4.py @@ -0,0 +1,73 @@ +""" +.. _torch_compile_phi4: + +Compiling Phi 4 model from Hugging Face using the Torch-TensorRT `torch.compile` Backend +====================================================== + +This script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a Phi 4 model from Hugging Face. +""" + +# %% +# Imports and Model Definition +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +import requests +import torch +import torch_tensorrt +from PIL import Image +from transformers import AutoModelForCausalLM, AutoProcessor + +# %% +# Load the pre-trained model weights from Hugging Face +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +model_id = "microsoft/Phi-4-multimodal-instruct" +processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) +model = ( + AutoModelForCausalLM.from_pretrained( + model_id, trust_remote_code=True, torch_dtype="auto" + ) + .eval() + .cuda() +) + +# %% +# Compile the model with torch.compile, using Torch-TensorRT backend +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +model.forward = torch.compile( + model.forward, + backend="tensorrt", + options={"debug": True, "min_block_size": 1, "use_python_runtime": True}, +) + +# %% +# Write prompt and load image +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +user_prompt = "<|user|>\n" +assistant_prompt = "<|assistant|>\n" +prompt_suffix = "<|end|>\n" + +# single-image prompt +prompt = f"{user_prompt}<|image_1|>\nWhat is shown in this image?{prompt_suffix}{assistant_prompt}" +url = "https://www.ilankelman.org/stopsigns/australia.jpg" +print(f">>> Prompt\n{prompt}") + +image = Image.open(requests.get(url, stream=True).raw) +inputs = processor(prompt, image, return_tensors="pt").to("cuda:0") + +# %% +# Inference +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +generate_ids = model.generate( + **inputs, + max_new_tokens=1000, + eos_token_id=processor.tokenizer.eos_token_id, +) +generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :] +response = processor.batch_decode( + generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False +)[0] +print(f">>> Response\n{response}")