Skip to content

Commit 8907a70

Browse files
authoredMar 24, 2025
New HunyuanVideo-I2V (#11066)
* update * update * update * add tests * update docs * raise value error * warning for true cfg and guidance scale * fix test
1 parent 5dbe4f5 commit 8907a70

File tree

6 files changed

+562
-44
lines changed

6 files changed

+562
-44
lines changed
 

‎docs/source/en/api/pipelines/hunyuan_video.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ The following models are available for the image-to-video pipeline:
5050
| Model name | Description |
5151
|:---|:---|
5252
| [`Skywork/SkyReels-V1-Hunyuan-I2V`](https://huggingface.co/Skywork/SkyReels-V1-Hunyuan-I2V) | Skywork's custom finetune of HunyuanVideo (de-distilled). Performs best with `97x544x960` resolution. Performs best at `97x544x960` resolution, `guidance_scale=1.0`, `true_cfg_scale=6.0` and a negative prompt. |
53-
| [`hunyuanvideo-community/HunyuanVideo-I2V`](https://huggingface.co/hunyuanvideo-community/HunyuanVideo-I2V) | Tecent's official HunyuanVideo I2V model. Performs best at resolutions of 480, 720, 960, 1280. A higher `shift` value when initializing the scheduler is recommended (good values are between 7 and 20) |
53+
| [`hunyuanvideo-community/HunyuanVideo-I2V-33ch`](https://huggingface.co/hunyuanvideo-community/HunyuanVideo-I2V) | Tecent's official HunyuanVideo 33-channel I2V model. Performs best at resolutions of 480, 720, 960, 1280. A higher `shift` value when initializing the scheduler is recommended (good values are between 7 and 20). |
54+
| [`hunyuanvideo-community/HunyuanVideo-I2V`](https://huggingface.co/hunyuanvideo-community/HunyuanVideo-I2V) | Tecent's official HunyuanVideo 16-channel I2V model. Performs best at resolutions of 480, 720, 960, 1280. A higher `shift` value when initializing the scheduler is recommended (good values are between 7 and 20) |
5455

5556
## Quantization
5657

‎scripts/convert_hunyuan_video_to_diffusers.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,9 @@ def remap_single_transformer_blocks_(key, state_dict):
160160
"pooled_projection_dim": 768,
161161
"rope_theta": 256.0,
162162
"rope_axes_dim": (16, 56, 56),
163+
"image_condition_type": None,
163164
},
164-
"HYVideo-T/2-I2V": {
165+
"HYVideo-T/2-I2V-33ch": {
165166
"in_channels": 16 * 2 + 1,
166167
"out_channels": 16,
167168
"num_attention_heads": 24,
@@ -178,6 +179,26 @@ def remap_single_transformer_blocks_(key, state_dict):
178179
"pooled_projection_dim": 768,
179180
"rope_theta": 256.0,
180181
"rope_axes_dim": (16, 56, 56),
182+
"image_condition_type": "latent_concat",
183+
},
184+
"HYVideo-T/2-I2V-16ch": {
185+
"in_channels": 16,
186+
"out_channels": 16,
187+
"num_attention_heads": 24,
188+
"attention_head_dim": 128,
189+
"num_layers": 20,
190+
"num_single_layers": 40,
191+
"num_refiner_layers": 2,
192+
"mlp_ratio": 4.0,
193+
"patch_size": 2,
194+
"patch_size_t": 1,
195+
"qk_norm": "rms_norm",
196+
"guidance_embeds": True,
197+
"text_embed_dim": 4096,
198+
"pooled_projection_dim": 768,
199+
"rope_theta": 256.0,
200+
"rope_axes_dim": (16, 56, 56),
201+
"image_condition_type": "token_replace",
181202
},
182203
}
183204

0 commit comments

Comments
 (0)