-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathaudio2face.py
52 lines (45 loc) · 1.78 KB
/
audio2face.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# speech to Audio2Face module utilizing the gRPC protocal from audio2face_streaming_utils
import riva.client
import io
from pydub import AudioSegment
from scipy.io.wavfile import read
import numpy as np
from audio2face_streaming_utils import push_audio_track
class Audio2FaceService:
def __init__(self, sample_rate=44100):
"""
:param sample_rate: sample rate
"""
self.a2f_url = 'localhost:50051' # Set it to the port of your local host
self.sample_rate = 44100
self.avatar_instance = '/World/audio2face/PlayerStreaming' # Set it to the name of your Audio2Face Streaming Instance
def tts_to_wav(self, tts_byte, framerate=22050) -> str:
"""
:param tts_byte: tts data in byte
:param framerate: framerate
:return: wav byte
"""
seg = AudioSegment.from_raw(io.BytesIO(tts_byte), sample_width=2, frame_rate=22050, channels=1)
wavIO = io.BytesIO()
seg.export(wavIO, format="wav")
rate, wav = read(io.BytesIO(wavIO.getvalue()))
return wav
def wav_to_numpy_float32(self, wav_byte) -> float:
"""
:param wav_byte: wav byte
:return: float32
"""
return wav_byte.astype(np.float32, order='C') / 32768.0
def get_tts_numpy_audio(self, audio) -> float:
"""
:param audio: audio from tts_to_wav
:return: float32 of the audio
"""
wav_byte = self.tts_to_wav(audio)
return self.wav_to_numpy_float32(wav_byte)
def make_avatar_speaks(self, audio) -> None:
"""
:param audio: tts audio
:return: None
"""
push_audio_track(self.a2f_url, self.get_tts_numpy_audio(audio), self.sample_rate, self.avatar_instance)