|
4 | 4 | # See https://llvm.org/LICENSE.txt for license information.
|
5 | 5 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
6 | 6 |
|
7 |
| -from typing import Union |
| 7 | +from typing import Optional, Union |
8 | 8 | import transformers
|
9 | 9 | from transformers.models.clip.modeling_clip import (
|
10 |
| - CLIPAttention as TransformersCLIPAttention, |
11 |
| - CLIPEncoderLayer as TransformersCLIPEncoderLayer, |
12 |
| - CLIPEncoder as TransformersCLIPEncoder, |
| 10 | + CLIPAttention as HfCLIPAttention, |
| 11 | + CLIPEncoderLayer as HfCLIPEncoderLayer, |
| 12 | + CLIPEncoder as HfCLIPEncoder, |
13 | 13 | )
|
14 | 14 | from os import PathLike
|
15 | 15 | import torch
|
16 | 16 |
|
17 | 17 | from ...types.theta import Theta, Dataset, torch_module_to_theta
|
18 |
| -from ...types.tensors import DefaultPrimitiveTensor |
19 | 18 | from ...layers.configs import ClipTextConfig
|
| 19 | +from .clip import ClipTextModel |
| 20 | +from iree.turbine.aot import FxProgramsBuilder, export |
20 | 21 |
|
21 | 22 |
|
22 |
| -def transformers_clip_attention_to_theta(model: TransformersCLIPAttention) -> Theta: |
| 23 | +def hugging_face_clip_attention_to_theta(model: HfCLIPAttention) -> Theta: |
23 | 24 | return torch_module_to_theta(model)
|
24 | 25 |
|
25 | 26 |
|
26 |
| -def transformers_clip_encoder_layer_to_theta(model: TransformersCLIPEncoder) -> Theta: |
| 27 | +def hugging_face_clip_encoder_layer_to_theta(model: HfCLIPEncoder) -> Theta: |
27 | 28 | return torch_module_to_theta(model)
|
28 | 29 |
|
29 | 30 |
|
30 |
| -def transformers_clip_encoder_to_theta(model: TransformersCLIPEncoderLayer) -> Theta: |
| 31 | +def hugging_face_clip_encoder_to_theta(model: HfCLIPEncoderLayer) -> Theta: |
31 | 32 | return torch_module_to_theta(model)
|
32 | 33 |
|
33 | 34 |
|
34 |
| -def transformers_clip_text_model_to_theta(model: transformers.CLIPTextModel) -> Theta: |
| 35 | +def hugging_face_clip_text_model_to_theta(model: transformers.CLIPTextModel) -> Theta: |
35 | 36 | return torch_module_to_theta(model)
|
36 | 37 |
|
37 | 38 |
|
38 |
| -def transformers_clip_text_model_to_dataset( |
| 39 | +def hugging_face_clip_text_model_to_dataset( |
39 | 40 | model: transformers.CLIPTextModel,
|
40 | 41 | ) -> Dataset:
|
41 |
| - config = ClipTextConfig.from_transformers_clip_text_config(model.config) |
42 |
| - properties = config.as_properties() |
43 |
| - theta = transformers_clip_text_model_to_theta(model) |
| 42 | + config = ClipTextConfig.from_hugging_face_clip_text_model_config(model.config) |
| 43 | + properties = config.to_properties() |
| 44 | + theta = hugging_face_clip_text_model_to_theta(model) |
44 | 45 | theta.rename_tensors_to_paths()
|
45 | 46 | return Dataset(properties, theta)
|
46 | 47 |
|
47 | 48 |
|
| 49 | +def clip_text_model_to_dataset(model: ClipTextModel) -> Dataset: |
| 50 | + return Dataset(properties=model.config.to_properties(), root_theta=model.theta) |
| 51 | + |
| 52 | + |
48 | 53 | def export_clip_text_model_dataset_from_hugging_face(
|
49 | 54 | model_or_name_or_path: Union[str, PathLike, transformers.CLIPTextModel],
|
50 | 55 | output_path: Union[str, PathLike],
|
| 56 | + dtype: Optional[torch.dtype] = None, |
51 | 57 | ):
|
52 | 58 | if isinstance(model_or_name_or_path, transformers.CLIPTextModel):
|
| 59 | + assert dtype is None |
53 | 60 | model = model_or_name_or_path
|
54 | 61 | else:
|
55 |
| - model = transformers.CLIPTextModel.from_pretrained(model_or_name_or_path) |
56 |
| - dataset = transformers_clip_text_model_to_dataset(model) |
| 62 | + model = transformers.CLIPTextModel.from_pretrained( |
| 63 | + model_or_name_or_path, torch_dtype=dtype |
| 64 | + ) |
| 65 | + dataset = hugging_face_clip_text_model_to_dataset(model) |
57 | 66 | dataset.save(output_path)
|
| 67 | + |
| 68 | + |
| 69 | +def export_clip_text_model_mlir( |
| 70 | + model: Union[ClipTextModel, PathLike], |
| 71 | + batch_sizes: list[int], |
| 72 | + mlir_output_path: str, |
| 73 | +): |
| 74 | + """ |
| 75 | + Args: |
| 76 | + model: either the torch module or path to GGUF/IRPA. |
| 77 | + """ |
| 78 | + if not isinstance(model, ClipTextModel): |
| 79 | + dataset = Dataset.load(model) |
| 80 | + config = ClipTextConfig.from_properties(dataset.properties) |
| 81 | + model = ClipTextModel(theta=dataset.root_theta, config=config) |
| 82 | + |
| 83 | + fxb = FxProgramsBuilder(model) |
| 84 | + |
| 85 | + for batch_size in batch_sizes: |
| 86 | + sample_inputs = model.sample_inputs(batch_size) |
| 87 | + |
| 88 | + @fxb.export_program( |
| 89 | + name=f"forward_bs{batch_size}", |
| 90 | + args=tuple(sample_inputs.values()), |
| 91 | + dynamic_shapes=None, |
| 92 | + strict=False, |
| 93 | + ) |
| 94 | + def _( |
| 95 | + model, |
| 96 | + input_ids, |
| 97 | + ): |
| 98 | + return model(input_ids) |
| 99 | + |
| 100 | + output = export(fxb, import_symbolic_shape_expressions=True) |
| 101 | + output.save_mlir(mlir_output_path) |
0 commit comments