Skip to content

Commit 7070eb8

Browse files
committed
make it a bit easier to swap for elevenlabs voices
1 parent 89bd122 commit 7070eb8

File tree

3 files changed

+36
-13
lines changed

3 files changed

+36
-13
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ recordings/
44
transcripts/
55
.env
66
__pycache__/
7+
.mypy_cache/

main.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import os
22
import datetime
33
from openai import OpenAI
4-
client = OpenAI()
4+
from elevenlabs.client import ElevenLabs, Voice
5+
from elevenlabs import stream
56
import argparse
67
from dataclasses import asdict
78
from models import Message
@@ -12,20 +13,25 @@
1213
import dotenv
1314
dotenv.load_dotenv('.env')
1415

16+
oai_client = OpenAI()
17+
elevenlabs_client = ElevenLabs()
18+
1519
CHAT_MODEL = "gpt-4o"
1620
TTS_MODEL = "tts-1"
1721
MODEL_TEMPERATURE = 0.5
1822
AUDIO_MODEL = "whisper-1"
19-
VOICE_MODEL = "alloy"
23+
VOICE_ID = os.getenv("ELEVENLABS_VOICE_ID")
2024

2125
def ask_gpt_chat(prompt: str, messages: list[Message]):
2226
"""Returns ChatGPT's response to the given prompt."""
2327
system_message = [{"role": "system", "content": prompt}]
2428
message_dicts = [asdict(message) for message in messages]
2529
conversation_messages = system_message + message_dicts
26-
response = client.chat.completions.create(model=CHAT_MODEL,
27-
messages=conversation_messages,
28-
temperature=MODEL_TEMPERATURE)
30+
response = oai_client.chat.completions.create(
31+
model=CHAT_MODEL,
32+
messages=conversation_messages,
33+
temperature=MODEL_TEMPERATURE
34+
)
2935
return response.choices[0].message.content
3036

3137
def setup_prompt(prompt_file: str = 'prompts/vet_prompt.md') -> str:
@@ -37,7 +43,7 @@ def setup_prompt(prompt_file: str = 'prompts/vet_prompt.md') -> str:
3743

3844
def get_transcription(file_path: str):
3945
audio_file= open(file_path, "rb")
40-
transcription = client.audio.transcriptions.create(
46+
transcription = oai_client.audio.transcriptions.create(
4147
model=AUDIO_MODEL,
4248
file=audio_file
4349
)
@@ -64,17 +70,27 @@ def record():
6470
f.write(transcript)
6571
return transcript
6672

67-
def text_to_speech(text: str):
73+
def oai_text_to_speech(text: str):
6874
timestamp = datetime.datetime.now().timestamp()
6975
speech_file_path = Path(__file__).parent / f"outputs/{timestamp}.mp3"
70-
response = client.audio.speech.create(
76+
response = oai_client.audio.speech.create(
7177
model=TTS_MODEL,
72-
voice=VOICE_MODEL,
78+
voice="nova",
7379
input=text
7480
)
7581
response.write_to_file(speech_file_path)
7682
return speech_file_path
7783

84+
def elevenlabs_text_to_speech(text: str):
85+
audio_stream = elevenlabs_client.generate(
86+
text=text,
87+
voice=Voice(
88+
voice_id=VOICE_ID
89+
),
90+
stream=True
91+
)
92+
stream(audio_stream)
93+
7894
def clean_up():
7995
logging.info('Exiting...')
8096
# Delete all the recordings and transcripts
@@ -93,8 +109,10 @@ def clean_up():
93109
if __name__ == "__main__":
94110
parser = argparse.ArgumentParser()
95111
parser.add_argument("-pf", "--prompt_file", help="Specify the prompt file to use.", type=str)
112+
parser.add_argument("-tts", "--tts_type", help="Specify the TTS type to use.", type=str, default="openai", choices=["openai", "elevenlabs"])
96113
args = parser.parse_args()
97114
prompt_file = args.prompt_file
115+
tts_type = args.tts_type or "openai"
98116

99117
prompt = setup_prompt(prompt_file)
100118
conversation_messages = []
@@ -106,9 +124,12 @@ def clean_up():
106124
answer = ask_gpt_chat(prompt, conversation_messages)
107125
logging.info(f'Caller: {answer}')
108126
logging.info('Playing audio...')
109-
audio_file = text_to_speech(answer)
110-
# Play the audio file
111-
os.system(f"afplay {audio_file}")
127+
if tts_type == "elevenlabs":
128+
elevenlabs_text_to_speech(answer)
129+
else:
130+
audio_file = oai_text_to_speech(answer)
131+
# Play the audio file
132+
os.system(f"afplay {audio_file}")
112133
conversation_messages.append(Message(role="assistant", content=answer))
113134
if 'bye' in user_input.lower():
114135
clean_up()

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
openai==1.30.3
22
SpeechRecognition==3.9.0
3-
PyAudio==0.2.13
3+
PyAudio==0.2.13
4+
elevenlabs==1.2.2

0 commit comments

Comments
 (0)