Skip to content

Commit

Permalink
chat: record button constantly listens
Browse files Browse the repository at this point in the history
  • Loading branch information
rmackay9 committed Dec 21, 2023
1 parent c293d04 commit b57b4a9
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 37 deletions.
115 changes: 100 additions & 15 deletions MAVProxy/modules/mavproxy_chat/chat_voice_to_text.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
'''
AI Chat Module voice-to-text class
Randy Mackay, December 2023
Audio threshold algorithm courtesy of Primusa on StackOverflow: https://stackoverflow.com/questions/18406570/python-record-audio-on-detected-sound
'''

import time
import math
import struct

try:
import pyaudio # install using, "sudo apt-get install python3-pyaudio"
Expand All @@ -19,6 +23,15 @@ def __init__(self):
self.client = None
self.assistant = None

# initialise audio recording
self.p = pyaudio.PyAudio()

# flag to enable/disable listening and recording
self.listening_and_recording_enabled = False

# audio stream is opened during listening and closed at the end of recording
self.stream = None

# set the OpenAI API key
def set_api_key(self, api_key_str):
self.client = OpenAI(api_key = api_key_str)
Expand All @@ -37,34 +50,84 @@ def check_connection(self):
# return True if connected
return self.client is not None

# record audio from microphone
# returns filename of recording or None if failed
def record_audio(self):
# Initialize PyAudio
p = pyaudio.PyAudio()
# listen for noise
# returns true if noise is detected, false if not
def listen_for_noise(self):
# check pyaudio is initialised
if self.p is None:
print("chat: pyaudio not initialised")
return False

# Open stream
try:
stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
self.stream = self.p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
except:
print("chat: failed to connect to microphone")
return False

# enable listening and recording
self.listening_and_recording_enabled = True

# throw away first 2 seconds of audio which can be noisy
for i in range(0, int(44100 / 1024 * 2)):
self.stream.read(1024)

# listen for noise
noise_detected = False
while self.listening_and_recording_enabled and not noise_detected:
data = self.stream.read(1024)
if self.volume_over_threshold(data):
noise_detected = True

# return true if listening enabled andnoise detected
if self.listening_and_recording_enabled and noise_detected:
return True

# stop and close the stream
self.stream.stop_stream()
self.stream.close()
return False

# stop listening for noise or recording audio
def stop_listening_or_recording(self):
self.listening_and_recording_enabled = False

# record audio from microphone. should only be called after listen_for_noise() returns true
# returns filename of recording on success
# returns None if failed to record or recording volume was below threshold (e.g. nothing recorded)
def record_audio(self):
# check pyaudio is initialised
if self.p is None:
print("chat: pyaudio not initialised")
return None

# calculate time recording should stop
# check stream is open and active
if self.stream is None or not self.stream.is_active() or self.stream.is_stopped():
print("chat: failed to connect to microphone")
return None

# record for at least 2 seconds
curr_time = time.time()
time_stop = curr_time + 5
stop_time = curr_time + 2

# record until specified time
# record until no noise is heard for 2 seconds or user unchecks the record button
noise_heard = False
frames = []
while curr_time < time_stop:
data = stream.read(1024)
while curr_time < stop_time and self.listening_and_recording_enabled:
data = self.stream.read(1024)
frames.append(data)
curr_time = time.time()
if self.volume_over_threshold(data):
noise_heard = True
stop_time = curr_time + 2

# if no noise was heard, return None
if not noise_heard:
return None

# Stop and close the stream
stream.stop_stream()
stream.close()
p.terminate()
# stop and close the stream
self.stream.stop_stream()
self.stream.close()

# Save audio file
wf = wave.open("recording.wav", "wb")
Expand All @@ -89,3 +152,25 @@ def convert_audio_to_text(self, audio_filename):
file=audio_file,
response_format="text")
return transcript

# return true if the volume of a frame of audio is above a given threshold
@staticmethod
def volume_over_threshold(frame, threshold = 10):
# calculate number of samples in the frame
num_samples = len(frame) / pyaudio.PyAudio().get_sample_size(pyaudio.paInt16)

# protect against divide by zero
if num_samples == 0:
return False
format = "%dh" % (num_samples)
shorts = struct.unpack(format, frame)

# iterate over the frame and calculate the RMS volume
sum_squares = 0.0
for sample in shorts:
n = sample * (1.0/32768.0)
sum_squares += n*n
volume_rms = math.sqrt(sum_squares / num_samples) * 1000

# return true if volume is above threshold
return volume_rms >= threshold
53 changes: 31 additions & 22 deletions MAVProxy/modules/mavproxy_chat/chat_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def __init__(self, mpstate):
self.horiz_sizer = wx.BoxSizer(wx.HORIZONTAL)

# add a record button
self.record_button = wx.Button(self.frame, id=-1, label="Rec", size=(75, 25))
self.frame.Bind(wx.EVT_BUTTON, self.record_button_click, self.record_button)
self.record_button = wx.ToggleButton(self.frame, id=-1, label="Rec", size=(75, 25))
self.frame.Bind(wx.EVT_TOGGLEBUTTON, self.record_button_click, self.record_button)
self.horiz_sizer.Add(self.record_button, proportion = 0, flag = wx.ALIGN_TOP | wx.ALL, border = 5)

# add an input text box
Expand Down Expand Up @@ -110,29 +110,38 @@ def apikey_close_button_click(self, event):

# record button clicked
def record_button_click(self, event):
# run record_button_click_execute in a new thread
th = Thread(target=self.record_button_click_execute, args=(event,))
th.start()
if self.record_button.GetValue():
# run record_button_click_execute in a new thread
th = Thread(target=self.record_button_click_execute, args=(event,))
th.start()
else:
self.chat_voice_to_text.stop_listening_or_recording()

# record button clicked
def record_button_click_execute(self, event):
# record audio
rec_filename = self.chat_voice_to_text.record_audio()
if rec_filename is None:
print("chat: audio recording failed")
self.set_status_text("Audio recording failed")
return

# convert audio to text and place in input box
text = self.chat_voice_to_text.convert_audio_to_text(rec_filename)
if text is None:
print("chat: audio to text conversion failed")
self.set_status_text("Audio to text conversion failed")
return
wx.CallAfter(self.text_input.SetValue, text)

# send text to assistant
self.send_text_to_assistant()
while True:
# listen for noise
if not self.chat_voice_to_text.listen_for_noise():
# exit if listening failed or user unclicked the record button
self.set_status_text("no noise detected")
wx.CallAfter(self.record_button.SetValue, False)
return

# noises heard, record audio
rec_filename = self.chat_voice_to_text.record_audio()
if rec_filename is None:
# audio was not recorded so return to listening
continue

# convert audio to text and place in input box
text = self.chat_voice_to_text.convert_audio_to_text(rec_filename)
if text is None:
self.set_status_text("audio to text conversion failed")
return
wx.CallAfter(self.text_input.SetValue, text)

# send text to assistant
self.send_text_to_assistant()

# send button clicked
def send_button_click(self, event):
Expand Down

0 comments on commit b57b4a9

Please sign in to comment.