Skip to content

Commit b57b4a9

Browse files
committed
chat: record button constantly listens
1 parent c293d04 commit b57b4a9

File tree

2 files changed

+131
-37
lines changed

2 files changed

+131
-37
lines changed

MAVProxy/modules/mavproxy_chat/chat_voice_to_text.py

Lines changed: 100 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
'''
22
AI Chat Module voice-to-text class
33
Randy Mackay, December 2023
4+
5+
Audio threshold algorithm courtesy of Primusa on StackOverflow: https://stackoverflow.com/questions/18406570/python-record-audio-on-detected-sound
46
'''
57

68
import time
9+
import math
10+
import struct
711

812
try:
913
import pyaudio # install using, "sudo apt-get install python3-pyaudio"
@@ -19,6 +23,15 @@ def __init__(self):
1923
self.client = None
2024
self.assistant = None
2125

26+
# initialise audio recording
27+
self.p = pyaudio.PyAudio()
28+
29+
# flag to enable/disable listening and recording
30+
self.listening_and_recording_enabled = False
31+
32+
# audio stream is opened during listening and closed at the end of recording
33+
self.stream = None
34+
2235
# set the OpenAI API key
2336
def set_api_key(self, api_key_str):
2437
self.client = OpenAI(api_key = api_key_str)
@@ -37,34 +50,84 @@ def check_connection(self):
3750
# return True if connected
3851
return self.client is not None
3952

40-
# record audio from microphone
41-
# returns filename of recording or None if failed
42-
def record_audio(self):
43-
# Initialize PyAudio
44-
p = pyaudio.PyAudio()
53+
# listen for noise
54+
# returns true if noise is detected, false if not
55+
def listen_for_noise(self):
56+
# check pyaudio is initialised
57+
if self.p is None:
58+
print("chat: pyaudio not initialised")
59+
return False
4560

4661
# Open stream
4762
try:
48-
stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
63+
self.stream = self.p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
4964
except:
5065
print("chat: failed to connect to microphone")
66+
return False
67+
68+
# enable listening and recording
69+
self.listening_and_recording_enabled = True
70+
71+
# throw away first 2 seconds of audio which can be noisy
72+
for i in range(0, int(44100 / 1024 * 2)):
73+
self.stream.read(1024)
74+
75+
# listen for noise
76+
noise_detected = False
77+
while self.listening_and_recording_enabled and not noise_detected:
78+
data = self.stream.read(1024)
79+
if self.volume_over_threshold(data):
80+
noise_detected = True
81+
82+
# return true if listening enabled andnoise detected
83+
if self.listening_and_recording_enabled and noise_detected:
84+
return True
85+
86+
# stop and close the stream
87+
self.stream.stop_stream()
88+
self.stream.close()
89+
return False
90+
91+
# stop listening for noise or recording audio
92+
def stop_listening_or_recording(self):
93+
self.listening_and_recording_enabled = False
94+
95+
# record audio from microphone. should only be called after listen_for_noise() returns true
96+
# returns filename of recording on success
97+
# returns None if failed to record or recording volume was below threshold (e.g. nothing recorded)
98+
def record_audio(self):
99+
# check pyaudio is initialised
100+
if self.p is None:
101+
print("chat: pyaudio not initialised")
51102
return None
52103

53-
# calculate time recording should stop
104+
# check stream is open and active
105+
if self.stream is None or not self.stream.is_active() or self.stream.is_stopped():
106+
print("chat: failed to connect to microphone")
107+
return None
108+
109+
# record for at least 2 seconds
54110
curr_time = time.time()
55-
time_stop = curr_time + 5
111+
stop_time = curr_time + 2
56112

57-
# record until specified time
113+
# record until no noise is heard for 2 seconds or user unchecks the record button
114+
noise_heard = False
58115
frames = []
59-
while curr_time < time_stop:
60-
data = stream.read(1024)
116+
while curr_time < stop_time and self.listening_and_recording_enabled:
117+
data = self.stream.read(1024)
61118
frames.append(data)
62119
curr_time = time.time()
120+
if self.volume_over_threshold(data):
121+
noise_heard = True
122+
stop_time = curr_time + 2
123+
124+
# if no noise was heard, return None
125+
if not noise_heard:
126+
return None
63127

64-
# Stop and close the stream
65-
stream.stop_stream()
66-
stream.close()
67-
p.terminate()
128+
# stop and close the stream
129+
self.stream.stop_stream()
130+
self.stream.close()
68131

69132
# Save audio file
70133
wf = wave.open("recording.wav", "wb")
@@ -89,3 +152,25 @@ def convert_audio_to_text(self, audio_filename):
89152
file=audio_file,
90153
response_format="text")
91154
return transcript
155+
156+
# return true if the volume of a frame of audio is above a given threshold
157+
@staticmethod
158+
def volume_over_threshold(frame, threshold = 10):
159+
# calculate number of samples in the frame
160+
num_samples = len(frame) / pyaudio.PyAudio().get_sample_size(pyaudio.paInt16)
161+
162+
# protect against divide by zero
163+
if num_samples == 0:
164+
return False
165+
format = "%dh" % (num_samples)
166+
shorts = struct.unpack(format, frame)
167+
168+
# iterate over the frame and calculate the RMS volume
169+
sum_squares = 0.0
170+
for sample in shorts:
171+
n = sample * (1.0/32768.0)
172+
sum_squares += n*n
173+
volume_rms = math.sqrt(sum_squares / num_samples) * 1000
174+
175+
# return true if volume is above threshold
176+
return volume_rms >= threshold

MAVProxy/modules/mavproxy_chat/chat_window.py

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ def __init__(self, mpstate):
4949
self.horiz_sizer = wx.BoxSizer(wx.HORIZONTAL)
5050

5151
# add a record button
52-
self.record_button = wx.Button(self.frame, id=-1, label="Rec", size=(75, 25))
53-
self.frame.Bind(wx.EVT_BUTTON, self.record_button_click, self.record_button)
52+
self.record_button = wx.ToggleButton(self.frame, id=-1, label="Rec", size=(75, 25))
53+
self.frame.Bind(wx.EVT_TOGGLEBUTTON, self.record_button_click, self.record_button)
5454
self.horiz_sizer.Add(self.record_button, proportion = 0, flag = wx.ALIGN_TOP | wx.ALL, border = 5)
5555

5656
# add an input text box
@@ -110,29 +110,38 @@ def apikey_close_button_click(self, event):
110110

111111
# record button clicked
112112
def record_button_click(self, event):
113-
# run record_button_click_execute in a new thread
114-
th = Thread(target=self.record_button_click_execute, args=(event,))
115-
th.start()
113+
if self.record_button.GetValue():
114+
# run record_button_click_execute in a new thread
115+
th = Thread(target=self.record_button_click_execute, args=(event,))
116+
th.start()
117+
else:
118+
self.chat_voice_to_text.stop_listening_or_recording()
116119

117120
# record button clicked
118121
def record_button_click_execute(self, event):
119-
# record audio
120-
rec_filename = self.chat_voice_to_text.record_audio()
121-
if rec_filename is None:
122-
print("chat: audio recording failed")
123-
self.set_status_text("Audio recording failed")
124-
return
125-
126-
# convert audio to text and place in input box
127-
text = self.chat_voice_to_text.convert_audio_to_text(rec_filename)
128-
if text is None:
129-
print("chat: audio to text conversion failed")
130-
self.set_status_text("Audio to text conversion failed")
131-
return
132-
wx.CallAfter(self.text_input.SetValue, text)
133-
134-
# send text to assistant
135-
self.send_text_to_assistant()
122+
while True:
123+
# listen for noise
124+
if not self.chat_voice_to_text.listen_for_noise():
125+
# exit if listening failed or user unclicked the record button
126+
self.set_status_text("no noise detected")
127+
wx.CallAfter(self.record_button.SetValue, False)
128+
return
129+
130+
# noises heard, record audio
131+
rec_filename = self.chat_voice_to_text.record_audio()
132+
if rec_filename is None:
133+
# audio was not recorded so return to listening
134+
continue
135+
136+
# convert audio to text and place in input box
137+
text = self.chat_voice_to_text.convert_audio_to_text(rec_filename)
138+
if text is None:
139+
self.set_status_text("audio to text conversion failed")
140+
return
141+
wx.CallAfter(self.text_input.SetValue, text)
142+
143+
# send text to assistant
144+
self.send_text_to_assistant()
136145

137146
# send button clicked
138147
def send_button_click(self, event):

0 commit comments

Comments
 (0)