Skip to content

Commit aae7692

Browse files
committed
add example of Phi 3 vision model
1 parent f699c46 commit aae7692

File tree

2 files changed

+66
-0
lines changed

2 files changed

+66
-0
lines changed

examples/dynamo/README.rst

+1
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ Model Zoo
2222
* :ref:`_torch_export_llama2`: Compiling a Llama2 model using AOT workflow (`ir=dynamo`)
2323
* :ref:`_torch_export_sam2`: Compiling SAM2 model using AOT workflow (`ir=dynamo`)
2424
* :ref:`_torch_export_flux_dev`: Compiling FLUX.1-dev model using AOT workflow (`ir=dynamo`)
25+
* :ref:`_torch_compile_phi3_vision`: Compiling a Phi 3 vision model from Hugging Face using ``torch.compile``
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
.. _torch_compile_phi3_vision:
3+
4+
Compiling Phi 3 vision model from Hugging Face using the Torch-TensorRT `torch.compile` Backend
5+
======================================================
6+
7+
This script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a Phi 3 vision model from Hugging Face.
8+
"""
9+
10+
# %%
11+
# Imports and Model Definition
12+
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13+
14+
import requests
15+
import torch
16+
import torch_tensorrt
17+
from PIL import Image
18+
from transformers import AutoModelForCausalLM, AutoProcessor
19+
20+
# %%
21+
# Load the pre-trained model weights from Hugging Face
22+
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
23+
24+
model_id = "microsoft/Phi-3-vision-128k-instruct"
25+
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
26+
model = AutoModelForCausalLM.from_pretrained(
27+
model_id, trust_remote_code=True, torch_dtype="auto"
28+
).cuda()
29+
30+
# %%
31+
# Compile the model with torch.compile, using Torch-TensorRT backend
32+
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
33+
34+
model = torch.compile(model, backend="tensorrt")
35+
36+
# %%
37+
# Write prompt and load image
38+
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
39+
40+
user_prompt = "<|user|>\n"
41+
assistant_prompt = "<|assistant|>\n"
42+
prompt_suffix = "<|end|>\n"
43+
44+
# single-image prompt
45+
prompt = f"{user_prompt}<|image_1|>\nWhat is shown in this image?{prompt_suffix}{assistant_prompt}"
46+
url = "https://www.ilankelman.org/stopsigns/australia.jpg"
47+
print(f">>> Prompt\n{prompt}")
48+
49+
image = Image.open(requests.get(url, stream=True).raw)
50+
inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
51+
52+
# %%
53+
# Inference
54+
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
55+
56+
generate_ids = model.generate(
57+
**inputs,
58+
max_new_tokens=1000,
59+
eos_token_id=processor.tokenizer.eos_token_id,
60+
)
61+
generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]
62+
response = processor.batch_decode(
63+
generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
64+
)[0]
65+
print(f">>> Response\n{response}")

0 commit comments

Comments
 (0)