11'''
22AI Chat Module voice-to-text class
33Randy Mackay, December 2023
4+
5+ Audio threshold algorithm courtesy of Primusa on StackOverflow: https://stackoverflow.com/questions/18406570/python-record-audio-on-detected-sound
46'''
57
68import time
9+ import math
10+ import struct
711
812try :
913 import pyaudio # install using, "sudo apt-get install python3-pyaudio"
@@ -19,6 +23,15 @@ def __init__(self):
1923 self .client = None
2024 self .assistant = None
2125
26+ # initialise audio recording
27+ self .p = pyaudio .PyAudio ()
28+
29+ # flag to enable/disable listening and recording
30+ self .listening_and_recording_enabled = False
31+
32+ # audio stream is opened during listening and closed at the end of recording
33+ self .stream = None
34+
2235 # set the OpenAI API key
2336 def set_api_key (self , api_key_str ):
2437 self .client = OpenAI (api_key = api_key_str )
@@ -37,34 +50,84 @@ def check_connection(self):
3750 # return True if connected
3851 return self .client is not None
3952
40- # record audio from microphone
41- # returns filename of recording or None if failed
42- def record_audio (self ):
43- # Initialize PyAudio
44- p = pyaudio .PyAudio ()
53+ # listen for noise
54+ # returns true if noise is detected, false if not
55+ def listen_for_noise (self ):
56+ # check pyaudio is initialised
57+ if self .p is None :
58+ print ("chat: pyaudio not initialised" )
59+ return False
4560
4661 # Open stream
4762 try :
48- stream = p .open (format = pyaudio .paInt16 , channels = 1 , rate = 44100 , input = True , frames_per_buffer = 1024 )
63+ self . stream = self . p .open (format = pyaudio .paInt16 , channels = 1 , rate = 44100 , input = True , frames_per_buffer = 1024 )
4964 except :
5065 print ("chat: failed to connect to microphone" )
66+ return False
67+
68+ # enable listening and recording
69+ self .listening_and_recording_enabled = True
70+
71+ # throw away first 2 seconds of audio which can be noisy
72+ for i in range (0 , int (44100 / 1024 * 2 )):
73+ self .stream .read (1024 )
74+
75+ # listen for noise
76+ noise_detected = False
77+ while self .listening_and_recording_enabled and not noise_detected :
78+ data = self .stream .read (1024 )
79+ if self .volume_over_threshold (data ):
80+ noise_detected = True
81+
82+ # return true if listening enabled andnoise detected
83+ if self .listening_and_recording_enabled and noise_detected :
84+ return True
85+
86+ # stop and close the stream
87+ self .stream .stop_stream ()
88+ self .stream .close ()
89+ return False
90+
91+ # stop listening for noise or recording audio
92+ def stop_listening_or_recording (self ):
93+ self .listening_and_recording_enabled = False
94+
95+ # record audio from microphone. should only be called after listen_for_noise() returns true
96+ # returns filename of recording on success
97+ # returns None if failed to record or recording volume was below threshold (e.g. nothing recorded)
98+ def record_audio (self ):
99+ # check pyaudio is initialised
100+ if self .p is None :
101+ print ("chat: pyaudio not initialised" )
51102 return None
52103
53- # calculate time recording should stop
104+ # check stream is open and active
105+ if self .stream is None or not self .stream .is_active () or self .stream .is_stopped ():
106+ print ("chat: failed to connect to microphone" )
107+ return None
108+
109+ # record for at least 2 seconds
54110 curr_time = time .time ()
55- time_stop = curr_time + 5
111+ stop_time = curr_time + 2
56112
57- # record until specified time
113+ # record until no noise is heard for 2 seconds or user unchecks the record button
114+ noise_heard = False
58115 frames = []
59- while curr_time < time_stop :
60- data = stream .read (1024 )
116+ while curr_time < stop_time and self . listening_and_recording_enabled :
117+ data = self . stream .read (1024 )
61118 frames .append (data )
62119 curr_time = time .time ()
120+ if self .volume_over_threshold (data ):
121+ noise_heard = True
122+ stop_time = curr_time + 2
123+
124+ # if no noise was heard, return None
125+ if not noise_heard :
126+ return None
63127
64- # Stop and close the stream
65- stream .stop_stream ()
66- stream .close ()
67- p .terminate ()
128+ # stop and close the stream
129+ self .stream .stop_stream ()
130+ self .stream .close ()
68131
69132 # Save audio file
70133 wf = wave .open ("recording.wav" , "wb" )
@@ -89,3 +152,25 @@ def convert_audio_to_text(self, audio_filename):
89152 file = audio_file ,
90153 response_format = "text" )
91154 return transcript
155+
156+ # return true if the volume of a frame of audio is above a given threshold
157+ @staticmethod
158+ def volume_over_threshold (frame , threshold = 10 ):
159+ # calculate number of samples in the frame
160+ num_samples = len (frame ) / pyaudio .PyAudio ().get_sample_size (pyaudio .paInt16 )
161+
162+ # protect against divide by zero
163+ if num_samples == 0 :
164+ return False
165+ format = "%dh" % (num_samples )
166+ shorts = struct .unpack (format , frame )
167+
168+ # iterate over the frame and calculate the RMS volume
169+ sum_squares = 0.0
170+ for sample in shorts :
171+ n = sample * (1.0 / 32768.0 )
172+ sum_squares += n * n
173+ volume_rms = math .sqrt (sum_squares / num_samples ) * 1000
174+
175+ # return true if volume is above threshold
176+ return volume_rms >= threshold
0 commit comments