-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathapp.py
76 lines (62 loc) · 3.16 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
import asyncio
from utils.audio_processing import capture_and_transcribe_audio
from utils.llm_interaction import generate_llm_response
from utils.tts_conversion import convert_text_to_speech, play_audio
st.set_page_config(page_title="Voice Assistant", layout="wide")
# Initialize session state for conversation history
if "conversation_history" not in st.session_state:
st.session_state.conversation_history = []
# UI elements for tunable parameters
st.sidebar.title("Settings")
pitch = st.sidebar.slider("Pitch", -10, 10, 0, 1)
speed = st.sidebar.slider("Speed", -50, 50, 0, 1)
voice = st.sidebar.selectbox("Voice", ["en-US-JennyNeural", "en-US-GuyNeural"])
# Header
st.title("🎙️ Voice Assistant")
st.write("Click the button below to start speaking. The assistant will listen, transcribe, respond, and speak back to you.")
# Placeholder for dynamic UI updates
status_placeholder = st.empty()
transcription_placeholder = st.empty()
response_placeholder = st.empty()
# Function to update conversation history
def update_conversation(user_text, assistant_text):
st.session_state.conversation_history.append({"User": user_text, "Assistant": assistant_text})
# Function to display conversation history
def display_conversation():
conversation_display = ""
for entry in st.session_state.conversation_history:
conversation_display += f"**You**: {entry.get('User', '')}\n\n"
conversation_display += f"**Assistant**: {entry.get('Assistant', '')}\n\n"
st.markdown(conversation_display)
# Start interaction
if st.button("Start Speaking"):
status_placeholder.text("Listening... 🎙️")
image_placeholder=st.image("assets/listening_audio.gif", width=150, caption="Listening...") # Adjust width as needed
# Step 1: Capture and transcribe speech
transcribed_text = asyncio.run(capture_and_transcribe_audio())
image_placeholder.empty()
if not transcribed_text:
status_placeholder.text("Please try speaking again.")
else:
transcription_placeholder.markdown(f"**You said**: {transcribed_text}")
status_placeholder.text("Generating response... 🤖")
# Step 2: Generate a response from LLM with only the user inputs
user_inputs = ' '.join([entry['User'] for entry in st.session_state.conversation_history])
response = generate_llm_response(transcribed_text, user_inputs)
response_placeholder.markdown(f"**Assistant**: {response}")
# Step 3: Convert the response text to speech and play it
if pitch >= 0:
pitch = f"+{pitch}"
if speed >= 0:
speed = f"+{speed}"
audio_file = asyncio.run(convert_text_to_speech(response, voice=voice, pitch=f"{pitch}Hz", rate=f"{speed}%"))
try:
play_audio(audio_file)
except Exception as e:
st.error(f"Error playing audio: {e}")
# Update conversation history with both user and assistant responses
update_conversation(transcribed_text, response)
status_placeholder.text("Interaction completed. Click the button to start again.")
# Display the conversation history once outside the loop
display_conversation()