Skip to content
This repository was archived by the owner on Oct 31, 2023. It is now read-only.

Commit 3c6628a

Browse files
author
Alexander Richard
committed
added source code
1 parent 347c62a commit 3c6628a

12 files changed

+37934
-0
lines changed

animate_face.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
"""
2+
Copyright (c) Facebook, Inc. and its affiliates.
3+
All rights reserved.
4+
This source code is licensed under the license found in the
5+
LICENSE file in the root directory of this source tree.
6+
"""
7+
8+
import argparse
9+
import numpy as np
10+
import torch as th
11+
12+
from utils.renderer import Renderer
13+
from utils.helpers import smooth_geom, load_mask, get_template_verts, load_audio, audio_chunking
14+
from models.vertex_unet import VertexUnet
15+
from models.context_model import ContextModel
16+
from models.encoders import MultimodalEncoder
17+
18+
19+
parser = argparse.ArgumentParser()
20+
parser.add_argument("--model_dir",
21+
type=str,
22+
default="pretrained_models",
23+
help="directory containing the models to load")
24+
parser.add_argument("--audio_file",
25+
type=str,
26+
default="assets/example_sentence.wav",
27+
help="wave file to use for face animation"
28+
)
29+
parser.add_argument("--face_template",
30+
type=str,
31+
default="assets/face_template.obj",
32+
help=".obj file containing neutral template mesh"
33+
)
34+
parser.add_argument("--output",
35+
type=str,
36+
default="video.mp4",
37+
help="video output file"
38+
)
39+
args = parser.parse_args()
40+
41+
"""
42+
load assets
43+
"""
44+
print("load assets...")
45+
template_verts = get_template_verts(args.face_template)
46+
audio = load_audio(args.audio_file)
47+
mean = th.from_numpy(np.load("assets/face_mean.npy"))
48+
stddev = th.from_numpy(np.load("assets/face_std.npy"))
49+
forehead_mask = th.from_numpy(load_mask("assets/forehead_mask.txt", dtype=np.float32)).cuda()
50+
neck_mask = th.from_numpy(load_mask("assets/neck_mask.txt", dtype=np.float32)).cuda()
51+
52+
renderer = Renderer("assets/face_template.obj")
53+
54+
"""
55+
load models
56+
"""
57+
print("load models...")
58+
geom_unet = VertexUnet(classes=128,
59+
heads=16,
60+
n_vertices=6172,
61+
mean=mean,
62+
stddev=stddev,
63+
)
64+
geom_unet.load(args.model_dir)
65+
geom_unet.cuda().eval()
66+
context_model = ContextModel(classes=128,
67+
heads=16,
68+
audio_dim=128
69+
)
70+
context_model.load(args.model_dir)
71+
context_model.cuda().eval()
72+
encoder = MultimodalEncoder(classes=128,
73+
heads=16,
74+
expression_dim=128,
75+
audio_dim=128,
76+
n_vertices=6172,
77+
mean=mean,
78+
stddev=stddev,
79+
)
80+
encoder.load(args.model_dir)
81+
encoder.cuda().eval()
82+
83+
"""
84+
generate and render sequence
85+
"""
86+
print("animate face mesh...")
87+
# run template mesh and audio through networks
88+
audio = audio_chunking(audio, frame_rate=30, chunk_size=16000)
89+
with th.no_grad():
90+
audio_enc = encoder.audio_encoder(audio.cuda().unsqueeze(0))["code"]
91+
one_hot = context_model.sample(audio_enc, argmax=False)["one_hot"]
92+
T = one_hot.shape[1]
93+
geom = template_verts.cuda().view(1, 1, 6172, 3).expand(-1, T, -1, -1).contiguous()
94+
result = geom_unet(geom, one_hot)["geom"].squeeze(0)
95+
# smooth results
96+
result = smooth_geom(result, forehead_mask)
97+
result = smooth_geom(result, neck_mask)
98+
# render sequence
99+
print("render...")
100+
renderer.to_video(result, args.audio_file, args.output)
101+
print("done")

assets/example_sentence.wav

594 KB
Binary file not shown.

assets/face_mean.npy

72.5 KB
Binary file not shown.

assets/face_std.npy

72.5 KB
Binary file not shown.

assets/face_template.obj

Lines changed: 24676 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)